mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-11 22:50:37 +00:00
Compare commits
23 Commits
sk-evictio
...
erik/batch
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4308ffe5c0 | ||
|
|
d5435b1a81 | ||
|
|
080d585b22 | ||
|
|
7595d3afe6 | ||
|
|
1ff5333a1b | ||
|
|
d8f5d43549 | ||
|
|
2256a5727a | ||
|
|
3f80af8b1d | ||
|
|
a61d81bbc7 | ||
|
|
05381a48f0 | ||
|
|
cef165818c | ||
|
|
6b19867410 | ||
|
|
cc8029c4c8 | ||
|
|
5be6b07cf1 | ||
|
|
b018bc7da8 | ||
|
|
4b075db7ea | ||
|
|
fde16f8614 | ||
|
|
5a138d08a3 | ||
|
|
2d9652c434 | ||
|
|
e9dcfa2eb2 | ||
|
|
8db84d9964 | ||
|
|
1aab34715a | ||
|
|
f63de5f527 |
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -20,3 +20,4 @@ config-variables:
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||
- DEV_AWS_OIDC_ROLE_ARN
|
||||
- BENCHMARK_INGEST_TARGET_PROJECTID
|
||||
|
||||
11
.github/pull_request_template.md
vendored
11
.github/pull_request_template.md
vendored
@@ -1,14 +1,3 @@
|
||||
## Problem
|
||||
|
||||
## Summary of changes
|
||||
|
||||
## Checklist before requesting a review
|
||||
|
||||
- [ ] I have performed a self-review of my code.
|
||||
- [ ] If it is a core feature, I have added thorough tests.
|
||||
- [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
|
||||
- [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.
|
||||
|
||||
## Checklist before merging
|
||||
|
||||
- [ ] Do not forget to reformat commit message to not include the above checklist
|
||||
|
||||
372
.github/workflows/ingest_benchmark.yml
vendored
Normal file
372
.github/workflows/ingest_benchmark.yml
vendored
Normal file
@@ -0,0 +1,372 @@
|
||||
name: Benchmarking
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
# push:
|
||||
# branches: [ your branch ]
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 9 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: ingest-bench-workflow
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ingest:
|
||||
strategy:
|
||||
matrix:
|
||||
target_project: [new_empty_project, large_existing_project]
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
|
||||
PSQL: /tmp/neon/pg_install/v16/bin/psql
|
||||
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
|
||||
PGCOPYDB: /pgcopydb/bin/pgcopydb
|
||||
PGCOPYDB_LIB_PATH: /pgcopydb/lib
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
timeout-minutes: 1440
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials # necessary to download artefacts
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Create Neon Project
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
id: create-neon-project-ingest-target
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: aws-us-east-2
|
||||
postgres_version: 16
|
||||
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Initialize Neon project and retrieve current backpressure seconds
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
env:
|
||||
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
|
||||
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
run: |
|
||||
echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}"
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
|
||||
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
|
||||
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
|
||||
|
||||
- name: Create Neon Branch for large tenant
|
||||
if: ${{ matrix.target_project == 'large_existing_project' }}
|
||||
id: create-neon-branch-ingest-target
|
||||
uses: ./.github/actions/neon-branch-create
|
||||
with:
|
||||
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Initialize Neon project and retrieve current backpressure seconds
|
||||
if: ${{ matrix.target_project == 'large_existing_project' }}
|
||||
env:
|
||||
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
|
||||
NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
|
||||
run: |
|
||||
echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}"
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
# Extract the part before the database name
|
||||
base_connstr="${NEW_PROJECT_CONNSTR%/*}"
|
||||
# Extract the query parameters (if any) after the database name
|
||||
query_params="${NEW_PROJECT_CONNSTR#*\?}"
|
||||
# Reconstruct the new connection string
|
||||
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
|
||||
new_connstr="${base_connstr}/neondb?${query_params}"
|
||||
else
|
||||
new_connstr="${base_connstr}/neondb"
|
||||
fi
|
||||
${PSQL} "${new_connstr}" -c "drop database ludicrous;"
|
||||
${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;"
|
||||
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
|
||||
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}"
|
||||
else
|
||||
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous"
|
||||
fi
|
||||
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
|
||||
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
|
||||
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Create pgcopydb filter file
|
||||
run: |
|
||||
cat << EOF > /tmp/pgcopydb_filter.txt
|
||||
[include-only-table]
|
||||
public.events
|
||||
public.emails
|
||||
public.email_transmissions
|
||||
public.payments
|
||||
public.editions
|
||||
public.edition_modules
|
||||
public.sp_content
|
||||
public.email_broadcasts
|
||||
public.user_collections
|
||||
public.devices
|
||||
public.user_accounts
|
||||
public.lessons
|
||||
public.lesson_users
|
||||
public.payment_methods
|
||||
public.orders
|
||||
public.course_emails
|
||||
public.modules
|
||||
public.users
|
||||
public.module_users
|
||||
public.courses
|
||||
public.payment_gateway_keys
|
||||
public.accounts
|
||||
public.roles
|
||||
public.payment_gateways
|
||||
public.management
|
||||
public.event_names
|
||||
EOF
|
||||
|
||||
- name: Invoke pgcopydb
|
||||
env:
|
||||
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH}
|
||||
export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}"
|
||||
export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}"
|
||||
export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
|
||||
${PG_CONFIG} --bindir
|
||||
${PGCOPYDB} --version
|
||||
${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \
|
||||
--index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \
|
||||
--use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log
|
||||
|
||||
# create dummy pgcopydb log to test parsing
|
||||
# - name: create dummy log for parser test
|
||||
# run: |
|
||||
# cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log
|
||||
# 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb"
|
||||
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
|
||||
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
|
||||
# 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb"
|
||||
# 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database
|
||||
# 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences
|
||||
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB
|
||||
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID
|
||||
# 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid.
|
||||
# 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id.
|
||||
# 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id.
|
||||
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id.
|
||||
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk
|
||||
# 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints)
|
||||
# 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences
|
||||
# 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions
|
||||
# 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database
|
||||
# 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data)
|
||||
# 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
|
||||
# 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database
|
||||
# 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
|
||||
# 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes
|
||||
# 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum
|
||||
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes
|
||||
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes
|
||||
# 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found.
|
||||
# 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values
|
||||
# 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database
|
||||
# 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database
|
||||
# 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
|
||||
# 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed
|
||||
# 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes
|
||||
|
||||
# OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration
|
||||
# ------+--------+----------------------+-------+---------------+-------------------+---------+----------------------
|
||||
# 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m
|
||||
# 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m
|
||||
# 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s
|
||||
# 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s
|
||||
# 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s
|
||||
# 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s
|
||||
# 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s
|
||||
# 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s
|
||||
# 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s
|
||||
# 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s
|
||||
# 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms
|
||||
# 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s
|
||||
# 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s
|
||||
# 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749
|
||||
# 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms
|
||||
# 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696
|
||||
# 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032
|
||||
# 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms
|
||||
# 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms
|
||||
# 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms
|
||||
# 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms
|
||||
# 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms
|
||||
# 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms
|
||||
# 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms
|
||||
# 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms
|
||||
# 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms
|
||||
|
||||
|
||||
# Step Connection Duration Transfer Concurrency
|
||||
# -------------------------------------------------- ---------- ---------- ---------- ------------
|
||||
# Catalog Queries (table ordering, filtering, etc) source 12s 1
|
||||
# Dump Schema source 765ms 1
|
||||
# Prepare Schema target 466ms 1
|
||||
# COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12
|
||||
# COPY (cumulative) both 7h46m 1225 GB 4
|
||||
# CREATE INDEX (cumulative) target 4h36m 4
|
||||
# CONSTRAINTS (cumulative) target 8s493 4
|
||||
# VACUUM (cumulative) target 0ms 4
|
||||
# Reset Sequences both 60ms 1
|
||||
# Large Objects (cumulative) (null) 0ms 0
|
||||
# Finalize Schema both 14h01m 4
|
||||
# -------------------------------------------------- ---------- ---------- ---------- ------------
|
||||
# Total Wall Clock Duration both 16h49m 20
|
||||
|
||||
|
||||
# EOF
|
||||
|
||||
|
||||
- name: show tables sizes and retrieve current backpressure seconds
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+"
|
||||
BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
|
||||
echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV
|
||||
|
||||
- name: Parse pgcopydb log and report performance metrics
|
||||
env:
|
||||
PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
|
||||
# Define the log file path
|
||||
LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log"
|
||||
|
||||
# Get the current git commit hash
|
||||
git config --global --add safe.directory /__w/neon/neon
|
||||
COMMIT_HASH=$(git rev-parse --short HEAD)
|
||||
|
||||
# Define the platform and test suite
|
||||
PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging"
|
||||
SUIT="pgcopydb_ingest_bench"
|
||||
|
||||
# Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds
|
||||
convert_to_seconds() {
|
||||
local duration=$1
|
||||
local total_seconds=0
|
||||
|
||||
# Check for hours (h)
|
||||
if [[ "$duration" =~ ([0-9]+)h ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600))
|
||||
fi
|
||||
|
||||
# Check for seconds (s)
|
||||
if [[ "$duration" =~ ([0-9]+)s ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0}))
|
||||
fi
|
||||
|
||||
# Check for milliseconds (ms) (if applicable)
|
||||
if [[ "$duration" =~ ([0-9]+)ms ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000))
|
||||
duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m
|
||||
fi
|
||||
|
||||
# Check for minutes (m) - must be checked after ms because m is contained in ms
|
||||
if [[ "$duration" =~ ([0-9]+)m ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60))
|
||||
fi
|
||||
|
||||
echo $total_seconds
|
||||
}
|
||||
|
||||
# Calculate the backpressure difference in seconds
|
||||
BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}")
|
||||
|
||||
# Insert the backpressure time difference into the performance database
|
||||
if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then
|
||||
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
|
||||
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
|
||||
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now());
|
||||
\""
|
||||
echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds"
|
||||
eval $PSQL_CMD
|
||||
fi
|
||||
|
||||
# Extract and process log lines
|
||||
while IFS= read -r line; do
|
||||
METRIC_NAME=""
|
||||
# Match each desired line and extract the relevant information
|
||||
if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then
|
||||
METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)"
|
||||
elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then
|
||||
METRIC_NAME="COPY (cumulative)"
|
||||
elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then
|
||||
METRIC_NAME="CREATE INDEX (cumulative)"
|
||||
elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then
|
||||
METRIC_NAME="CONSTRAINTS (cumulative)"
|
||||
elif [[ "$line" =~ Finalize\ Schema.* ]]; then
|
||||
METRIC_NAME="Finalize Schema"
|
||||
elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then
|
||||
METRIC_NAME="Total Wall Clock Duration"
|
||||
fi
|
||||
|
||||
# If a metric was matched, insert it into the performance database
|
||||
if [ -n "$METRIC_NAME" ]; then
|
||||
DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1)
|
||||
METRIC_VALUE=$(convert_to_seconds "$DURATION")
|
||||
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
|
||||
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
|
||||
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now());
|
||||
\""
|
||||
echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds"
|
||||
eval $PSQL_CMD
|
||||
fi
|
||||
done < "$LOG_FILE"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Delete Neon Branch for large tenant
|
||||
if: ${{ always() && matrix.target_project == 'large_existing_project' }}
|
||||
uses: ./.github/actions/neon-branch-delete
|
||||
with:
|
||||
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
|
||||
branch_id: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
29
.github/workflows/report-workflow-stats-batch.yml
vendored
Normal file
29
.github/workflows/report-workflow-stats-batch.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
name: Report Workflow Stats Batch
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '*/15 * * * *'
|
||||
- cron: '25 0 * * *'
|
||||
|
||||
jobs:
|
||||
gh-workflow-stats-batch:
|
||||
name: GitHub Workflow Stats Batch
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export Workflow Run for the past 2 hours
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_batch_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '2h'
|
||||
- name: Export Workflow Run for the past 24 hours
|
||||
if: github.event.schedule == '25 0 * * *'
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_batch_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '24h'
|
||||
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -994,9 +994,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.5.0"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
|
||||
checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
@@ -1229,12 +1229,15 @@ dependencies = [
|
||||
"flate2",
|
||||
"futures",
|
||||
"hyper 0.14.30",
|
||||
"metrics",
|
||||
"nix 0.27.1",
|
||||
"notify",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"postgres",
|
||||
"prometheus",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
|
||||
@@ -624,16 +624,12 @@ FROM build-deps AS pg-cron-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# 1.6.4 available, supports v17
|
||||
# This is an experimental extension that we do not support on prod yet.
|
||||
# !Do not remove!
|
||||
# We set it in shared_preload_libraries and computes will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
||||
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.4.tar.gz -O pg_cron.tar.gz && \
|
||||
echo "52d1850ee7beb85a4cb7185731ef4e5a90d1de216709d8988324b0d02e76af61 pg_cron.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -1475,6 +1471,8 @@ RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
|
||||
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
|
||||
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
|
||||
|
||||
COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml
|
||||
|
||||
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml /etc/sql_exporter.yml
|
||||
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml /etc/neon_collector.yml
|
||||
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
|
||||
|
||||
0
compute/etc/postgres_exporter.yml
Normal file
0
compute/etc/postgres_exporter.yml
Normal file
@@ -1 +1 @@
|
||||
SELECT neon.backpressure_throttling_time()::float8 / 1000 AS throttled;
|
||||
SELECT (neon.backpressure_throttling_time()::float8 / 1000000) AS throttled;
|
||||
|
||||
@@ -26,7 +26,7 @@ commands:
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -26,7 +26,7 @@ commands:
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -18,9 +18,11 @@ clap.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
metrics.workspace = true
|
||||
nix.workspace = true
|
||||
notify.workspace = true
|
||||
num_cpus.workspace = true
|
||||
once_cell.workspace = true
|
||||
opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
postgres.workspace = true
|
||||
@@ -39,6 +41,7 @@ tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
prometheus.workspace = true
|
||||
|
||||
compute_api.workspace = true
|
||||
utils.workspace = true
|
||||
|
||||
@@ -74,10 +74,17 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
|
||||
// Locales
|
||||
writeln!(file, "lc_messages='C.UTF-8'")?;
|
||||
writeln!(file, "lc_monetary='C.UTF-8'")?;
|
||||
writeln!(file, "lc_time='C.UTF-8'")?;
|
||||
writeln!(file, "lc_numeric='C.UTF-8'")?;
|
||||
if cfg!(target_os = "macos") {
|
||||
writeln!(file, "lc_messages='C'")?;
|
||||
writeln!(file, "lc_monetary='C'")?;
|
||||
writeln!(file, "lc_time='C'")?;
|
||||
writeln!(file, "lc_numeric='C'")?;
|
||||
} else {
|
||||
writeln!(file, "lc_messages='C.UTF-8'")?;
|
||||
writeln!(file, "lc_monetary='C.UTF-8'")?;
|
||||
writeln!(file, "lc_time='C.UTF-8'")?;
|
||||
writeln!(file, "lc_numeric='C.UTF-8'")?;
|
||||
}
|
||||
|
||||
match spec.mode {
|
||||
ComputeMode::Primary => {}
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::catalog::SchemaDumpError;
|
||||
use crate::catalog::{get_database_schema, get_dbs_and_roles};
|
||||
use crate::compute::forward_termination_signal;
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use crate::installed_extensions;
|
||||
use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
|
||||
use compute_api::responses::{
|
||||
ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
|
||||
@@ -19,6 +20,8 @@ use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
use tokio::task;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing_utils::http::OtelName;
|
||||
@@ -65,6 +68,28 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
|
||||
}
|
||||
|
||||
// Prometheus metrics
|
||||
(&Method::GET, "/metrics") => {
|
||||
debug!("serving /metrics GET request");
|
||||
|
||||
let mut buffer = vec![];
|
||||
let metrics = installed_extensions::collect();
|
||||
let encoder = TextEncoder::new();
|
||||
encoder.encode(&metrics, &mut buffer).unwrap();
|
||||
|
||||
match Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(CONTENT_TYPE, encoder.format_type())
|
||||
.body(Body::from(buffer))
|
||||
{
|
||||
Ok(response) => response,
|
||||
Err(err) => {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Collect Postgres current usage insights
|
||||
(&Method::GET, "/insights") => {
|
||||
info!("serving /insights GET request");
|
||||
|
||||
@@ -37,6 +37,21 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeMetrics"
|
||||
|
||||
/metrics
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get compute node metrics in text format.
|
||||
description: ""
|
||||
operationId: getComputeMetrics
|
||||
responses:
|
||||
200:
|
||||
description: ComputeMetrics
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
description: Metrics in text format.
|
||||
/insights:
|
||||
get:
|
||||
tags:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use metrics::proto::MetricFamily;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use tracing::info;
|
||||
@@ -8,6 +9,10 @@ use anyhow::Result;
|
||||
use postgres::{Client, NoTls};
|
||||
use tokio::task;
|
||||
|
||||
use metrics::core::Collector;
|
||||
use metrics::{register_uint_gauge_vec, UIntGaugeVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
/// We don't reuse get_existing_dbs() just for code clarity
|
||||
/// and to make database listing query here more explicit.
|
||||
///
|
||||
@@ -59,6 +64,12 @@ pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtension
|
||||
|
||||
for (extname, v) in extensions.iter() {
|
||||
let version = v.to_string();
|
||||
|
||||
// increment the number of databases where the version of extension is installed
|
||||
INSTALLED_EXTENSIONS
|
||||
.with_label_values(&[extname, &version])
|
||||
.inc();
|
||||
|
||||
extensions_map
|
||||
.entry(extname.to_string())
|
||||
.and_modify(|e| {
|
||||
@@ -74,9 +85,11 @@ pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtension
|
||||
}
|
||||
}
|
||||
|
||||
Ok(InstalledExtensions {
|
||||
let res = InstalledExtensions {
|
||||
extensions: extensions_map.values().cloned().collect(),
|
||||
})
|
||||
};
|
||||
|
||||
Ok(res)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
@@ -97,6 +110,18 @@ pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {
|
||||
"[NEON_EXT_STAT] {}",
|
||||
serde_json::to_string(&result).expect("failed to serialize extensions list")
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
INSTALLED_EXTENSIONS.collect()
|
||||
}
|
||||
|
||||
@@ -277,7 +277,11 @@ pub mod defaults {
|
||||
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
||||
|
||||
pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
|
||||
pub const DEFAULT_LOCALE: &str = "C.UTF-8";
|
||||
pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
|
||||
"C"
|
||||
} else {
|
||||
"C.UTF-8"
|
||||
};
|
||||
|
||||
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
|
||||
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
|
||||
|
||||
@@ -80,18 +80,18 @@ impl NeonWalRecord {
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub fn wal_clear() -> Self {
|
||||
pub fn wal_clear(s: impl AsRef<str>) -> Self {
|
||||
Self::Test {
|
||||
append: "".to_string(),
|
||||
append: s.as_ref().to_string(),
|
||||
clear: true,
|
||||
will_init: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub fn wal_init() -> Self {
|
||||
pub fn wal_init(s: impl AsRef<str>) -> Self {
|
||||
Self::Test {
|
||||
append: "".to_string(),
|
||||
append: s.as_ref().to_string(),
|
||||
clear: true,
|
||||
will_init: true,
|
||||
}
|
||||
|
||||
@@ -123,15 +123,27 @@ pub async fn fsync_async_opt(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like postgres' durable_rename, renames file issuing fsyncs do make it
|
||||
/// durable. After return, file and rename are guaranteed to be persisted.
|
||||
/// Like postgres' durable_rename, renames a file and issues fsyncs to make it durable. After
|
||||
/// returning, both the file and rename are guaranteed to be persisted. Both paths must be on the
|
||||
/// same file system.
|
||||
///
|
||||
/// Unlike postgres, it only does fsyncs to 1) file to be renamed to make
|
||||
/// contents durable; 2) its directory entry to make rename durable 3) again to
|
||||
/// already renamed file, which is not required by standards but postgres does
|
||||
/// it, let's stick to that. Postgres additionally fsyncs newpath *before*
|
||||
/// rename if it exists to ensure that at least one of the files survives, but
|
||||
/// current callers don't need that.
|
||||
/// Unlike postgres, it only fsyncs 1) the file to make contents durable, and 2) the directory to
|
||||
/// make the rename durable. This sequence ensures the target file will never be incomplete.
|
||||
///
|
||||
/// Postgres also:
|
||||
///
|
||||
/// * Fsyncs the target file, if it exists, before the rename, to ensure either the new or existing
|
||||
/// file survives a crash. Current callers don't need this as it should already be fsynced if
|
||||
/// durability is needed.
|
||||
///
|
||||
/// * Fsyncs the file after the rename. This can be required with certain OSes or file systems (e.g.
|
||||
/// NFS), but not on Linux with most common file systems like ext4 (which we currently use).
|
||||
///
|
||||
/// An audit of 8 other databases found that none fsynced the file after a rename:
|
||||
/// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837180535>
|
||||
///
|
||||
/// eBPF probes confirmed that this is sufficient with ext4, XFS, and ZFS, but possibly not Btrfs:
|
||||
/// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837926218>
|
||||
///
|
||||
/// virtual_file.rs has similar code, but it doesn't use vfs.
|
||||
///
|
||||
@@ -149,9 +161,6 @@ pub async fn durable_rename(
|
||||
// Time to do the real deal.
|
||||
tokio::fs::rename(old_path.as_ref(), new_path.as_ref()).await?;
|
||||
|
||||
// Postgres'ish fsync of renamed file.
|
||||
fsync_async_opt(new_path.as_ref(), do_fsync).await?;
|
||||
|
||||
// Now fsync the parent
|
||||
let parent = match new_path.as_ref().parent() {
|
||||
Some(p) => p,
|
||||
|
||||
@@ -138,6 +138,11 @@ impl Lsn {
|
||||
self.0.checked_sub(other).map(Lsn)
|
||||
}
|
||||
|
||||
/// Subtract a number, saturating at numeric bounds instead of overflowing.
|
||||
pub fn saturating_sub<T: Into<u64>>(self, other: T) -> Lsn {
|
||||
Lsn(self.0.saturating_sub(other.into()))
|
||||
}
|
||||
|
||||
/// Subtract a number, returning the difference as i128 to avoid overflow.
|
||||
pub fn widening_sub<T: Into<u64>>(self, other: T) -> i128 {
|
||||
let other: u64 = other.into();
|
||||
|
||||
@@ -35,6 +35,15 @@ pub fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
|
||||
!(a.end <= b.start || b.end <= a.start)
|
||||
}
|
||||
|
||||
/// Whether a fully contains b, example as below
|
||||
/// ```plain
|
||||
/// | a |
|
||||
/// | b |
|
||||
/// ```
|
||||
pub fn fully_contains<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
|
||||
a.start <= b.start && a.end >= b.end
|
||||
}
|
||||
|
||||
pub fn union_to_keyspace<K: Ord>(a: &mut CompactionKeySpace<K>, b: CompactionKeySpace<K>) {
|
||||
let x = std::mem::take(a);
|
||||
let mut all_ranges_iter = [x.into_iter(), b.into_iter()]
|
||||
|
||||
@@ -4786,12 +4786,6 @@ async fn run_initdb(
|
||||
.args(["--username", &conf.superuser])
|
||||
.args(["--encoding", "utf8"])
|
||||
.args(["--locale", &conf.locale])
|
||||
.args(["--lc-collate", &conf.locale])
|
||||
.args(["--lc-ctype", &conf.locale])
|
||||
.args(["--lc-messages", &conf.locale])
|
||||
.args(["--lc-monetary", &conf.locale])
|
||||
.args(["--lc-numeric", &conf.locale])
|
||||
.args(["--lc-time", &conf.locale])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
@@ -7763,13 +7757,13 @@ mod tests {
|
||||
(
|
||||
get_key(3),
|
||||
Lsn(0x20),
|
||||
Value::WalRecord(NeonWalRecord::wal_clear()),
|
||||
Value::WalRecord(NeonWalRecord::wal_clear("c")),
|
||||
),
|
||||
(get_key(4), Lsn(0x10), Value::Image("0x10".into())),
|
||||
(
|
||||
get_key(4),
|
||||
Lsn(0x20),
|
||||
Value::WalRecord(NeonWalRecord::wal_init()),
|
||||
Value::WalRecord(NeonWalRecord::wal_init("i")),
|
||||
),
|
||||
];
|
||||
let image1 = vec![(get_key(1), "0x10".into())];
|
||||
@@ -7918,8 +7912,30 @@ mod tests {
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tokio::test]
|
||||
async fn test_simple_bottom_most_compaction_deltas() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("test_simple_bottom_most_compaction_deltas").await?;
|
||||
async fn test_simple_bottom_most_compaction_deltas_1() -> anyhow::Result<()> {
|
||||
test_simple_bottom_most_compaction_deltas_helper(
|
||||
"test_simple_bottom_most_compaction_deltas_1",
|
||||
false,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tokio::test]
|
||||
async fn test_simple_bottom_most_compaction_deltas_2() -> anyhow::Result<()> {
|
||||
test_simple_bottom_most_compaction_deltas_helper(
|
||||
"test_simple_bottom_most_compaction_deltas_2",
|
||||
true,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
async fn test_simple_bottom_most_compaction_deltas_helper(
|
||||
test_name: &'static str,
|
||||
use_delta_bottom_layer: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create(test_name).await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
fn get_key(id: u32) -> Key {
|
||||
@@ -7950,6 +7966,16 @@ mod tests {
|
||||
let img_layer = (0..10)
|
||||
.map(|id| (get_key(id), Bytes::from(format!("value {id}@0x10"))))
|
||||
.collect_vec();
|
||||
// or, delta layer at 0x10 if `use_delta_bottom_layer` is true
|
||||
let delta4 = (0..10)
|
||||
.map(|id| {
|
||||
(
|
||||
get_key(id),
|
||||
Lsn(0x08),
|
||||
Value::WalRecord(NeonWalRecord::wal_init(format!("value {id}@0x10"))),
|
||||
)
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let delta1 = vec![
|
||||
(
|
||||
@@ -8003,21 +8029,61 @@ mod tests {
|
||||
),
|
||||
];
|
||||
|
||||
let tline = tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TIMELINE_ID,
|
||||
Lsn(0x10),
|
||||
DEFAULT_PG_VERSION,
|
||||
&ctx,
|
||||
vec![
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta1),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta2),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3),
|
||||
], // delta layers
|
||||
vec![(Lsn(0x10), img_layer)], // image layers
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?;
|
||||
let tline = if use_delta_bottom_layer {
|
||||
tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TIMELINE_ID,
|
||||
Lsn(0x08),
|
||||
DEFAULT_PG_VERSION,
|
||||
&ctx,
|
||||
vec![
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x08)..Lsn(0x10),
|
||||
delta4,
|
||||
),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x20)..Lsn(0x48),
|
||||
delta1,
|
||||
),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x20)..Lsn(0x48),
|
||||
delta2,
|
||||
),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x48)..Lsn(0x50),
|
||||
delta3,
|
||||
),
|
||||
], // delta layers
|
||||
vec![], // image layers
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TIMELINE_ID,
|
||||
Lsn(0x10),
|
||||
DEFAULT_PG_VERSION,
|
||||
&ctx,
|
||||
vec![
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x10)..Lsn(0x48),
|
||||
delta1,
|
||||
),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x10)..Lsn(0x48),
|
||||
delta2,
|
||||
),
|
||||
DeltaLayerTestDesc::new_with_inferred_key_range(
|
||||
Lsn(0x48)..Lsn(0x50),
|
||||
delta3,
|
||||
),
|
||||
], // delta layers
|
||||
vec![(Lsn(0x10), img_layer)], // image layers
|
||||
Lsn(0x50),
|
||||
)
|
||||
.await?
|
||||
};
|
||||
{
|
||||
// Update GC info
|
||||
let mut guard = tline.gc_info.write().unwrap();
|
||||
@@ -8127,7 +8193,7 @@ mod tests {
|
||||
(
|
||||
key,
|
||||
Lsn(0x10),
|
||||
Value::Image(Bytes::copy_from_slice(b"0x10")),
|
||||
Value::WalRecord(NeonWalRecord::wal_init("0x10")),
|
||||
),
|
||||
(
|
||||
key,
|
||||
@@ -8189,7 +8255,7 @@ mod tests {
|
||||
Lsn(0x20),
|
||||
KeyLogAtLsn(vec![(
|
||||
Lsn(0x20),
|
||||
Value::Image(Bytes::copy_from_slice(b"0x10;0x20")),
|
||||
Value::Image(Bytes::from_static(b"0x10;0x20")),
|
||||
)]),
|
||||
),
|
||||
(
|
||||
@@ -9171,7 +9237,7 @@ mod tests {
|
||||
|
||||
let will_init = will_init_keys.contains(&i);
|
||||
if will_init {
|
||||
delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init())));
|
||||
delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init(""))));
|
||||
|
||||
expected_key_values.insert(key, "".to_string());
|
||||
} else {
|
||||
@@ -9229,6 +9295,23 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn sort_layer_key(k1: &PersistentLayerKey, k2: &PersistentLayerKey) -> std::cmp::Ordering {
|
||||
(
|
||||
k1.is_delta,
|
||||
k1.key_range.start,
|
||||
k1.key_range.end,
|
||||
k1.lsn_range.start,
|
||||
k1.lsn_range.end,
|
||||
)
|
||||
.cmp(&(
|
||||
k2.is_delta,
|
||||
k2.key_range.start,
|
||||
k2.key_range.end,
|
||||
k2.lsn_range.start,
|
||||
k2.lsn_range.end,
|
||||
))
|
||||
}
|
||||
|
||||
async fn inspect_and_sort(
|
||||
tline: &Arc<Timeline>,
|
||||
filter: Option<std::ops::Range<Key>>,
|
||||
@@ -9237,25 +9320,30 @@ mod tests {
|
||||
if let Some(filter) = filter {
|
||||
all_layers.retain(|layer| overlaps_with(&layer.key_range, &filter));
|
||||
}
|
||||
all_layers.sort_by(|k1, k2| {
|
||||
(
|
||||
k1.is_delta,
|
||||
k1.key_range.start,
|
||||
k1.key_range.end,
|
||||
k1.lsn_range.start,
|
||||
k1.lsn_range.end,
|
||||
)
|
||||
.cmp(&(
|
||||
k2.is_delta,
|
||||
k2.key_range.start,
|
||||
k2.key_range.end,
|
||||
k2.lsn_range.start,
|
||||
k2.lsn_range.end,
|
||||
))
|
||||
});
|
||||
all_layers.sort_by(sort_layer_key);
|
||||
all_layers
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
fn check_layer_map_key_eq(
|
||||
mut left: Vec<PersistentLayerKey>,
|
||||
mut right: Vec<PersistentLayerKey>,
|
||||
) {
|
||||
left.sort_by(sort_layer_key);
|
||||
right.sort_by(sort_layer_key);
|
||||
if left != right {
|
||||
eprintln!("---LEFT---");
|
||||
for left in left.iter() {
|
||||
eprintln!("{}", left);
|
||||
}
|
||||
eprintln!("---RIGHT---");
|
||||
for right in right.iter() {
|
||||
eprintln!("{}", right);
|
||||
}
|
||||
assert_eq!(left, right);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tokio::test]
|
||||
async fn test_simple_partial_bottom_most_compaction() -> anyhow::Result<()> {
|
||||
@@ -9348,127 +9436,206 @@ mod tests {
|
||||
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
// Do a partial compaction on key range 0..4, we should generate a image layer; no other layers
|
||||
// can be removed because they might be used for other key ranges.
|
||||
// Do a partial compaction on key range 0..2
|
||||
tline
|
||||
.partial_compact_with_gc(Some(get_key(0)..get_key(4)), &cancel, EnumSet::new(), &ctx)
|
||||
.partial_compact_with_gc(get_key(0)..get_key(2), &cancel, EnumSet::new(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
assert_eq!(
|
||||
check_layer_map_key_eq(
|
||||
all_layers,
|
||||
vec![
|
||||
// newly-generated image layer for the partial compaction range 0-2
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(4),
|
||||
key_range: get_key(0)..get_key(2),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(10),
|
||||
lsn_range: Lsn(0x10)..Lsn(0x11),
|
||||
is_delta: false
|
||||
is_delta: false,
|
||||
},
|
||||
// delta1 is split and the second part is rewritten
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(1)..get_key(4),
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true
|
||||
is_delta: true,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(5)..get_key(7),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true
|
||||
is_delta: true,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(8)..get_key(10),
|
||||
lsn_range: Lsn(0x48)..Lsn(0x50),
|
||||
is_delta: true
|
||||
}
|
||||
]
|
||||
is_delta: true,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
// Do a partial compaction on key range 4..10
|
||||
// Do a partial compaction on key range 2..4
|
||||
tline
|
||||
.partial_compact_with_gc(Some(get_key(4)..get_key(10)), &cancel, EnumSet::new(), &ctx)
|
||||
.partial_compact_with_gc(get_key(2)..get_key(4), &cancel, EnumSet::new(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
assert_eq!(
|
||||
check_layer_map_key_eq(
|
||||
all_layers,
|
||||
vec![
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(4),
|
||||
key_range: get_key(0)..get_key(2),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
// if (in the future) GC kicks in, this layer will be removed
|
||||
key_range: get_key(0)..get_key(10),
|
||||
lsn_range: Lsn(0x10)..Lsn(0x11),
|
||||
is_delta: false
|
||||
is_delta: false,
|
||||
},
|
||||
// image layer generated for the compaction range 2-4
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(4)..get_key(10),
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false
|
||||
is_delta: false,
|
||||
},
|
||||
// we have key2/key3 above the retain_lsn, so we still need this delta layer
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(1)..get_key(4),
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true
|
||||
is_delta: true,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(5)..get_key(7),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true
|
||||
is_delta: true,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(8)..get_key(10),
|
||||
lsn_range: Lsn(0x48)..Lsn(0x50),
|
||||
is_delta: true
|
||||
}
|
||||
]
|
||||
is_delta: true,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
// Do a partial compaction on key range 4..9
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(4)..get_key(9), &cancel, EnumSet::new(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
check_layer_map_key_eq(
|
||||
all_layers,
|
||||
vec![
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(2),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(10),
|
||||
lsn_range: Lsn(0x10)..Lsn(0x11),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true,
|
||||
},
|
||||
// image layer generated for this compaction range
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(4)..get_key(9),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(8)..get_key(10),
|
||||
lsn_range: Lsn(0x48)..Lsn(0x50),
|
||||
is_delta: true,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
// Do a partial compaction on key range 9..10
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(9)..get_key(10), &cancel, EnumSet::new(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
check_layer_map_key_eq(
|
||||
all_layers,
|
||||
vec![
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(2),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(10),
|
||||
lsn_range: Lsn(0x10)..Lsn(0x11),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(4)..get_key(9),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
// image layer generated for the compaction range
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(9)..get_key(10),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(8)..get_key(10),
|
||||
lsn_range: Lsn(0x48)..Lsn(0x50),
|
||||
is_delta: true,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
// Do a partial compaction on key range 0..10, all image layers below LSN 20 can be replaced with new ones.
|
||||
tline
|
||||
.partial_compact_with_gc(Some(get_key(0)..get_key(10)), &cancel, EnumSet::new(), &ctx)
|
||||
.partial_compact_with_gc(get_key(0)..get_key(10), &cancel, EnumSet::new(), &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
assert_eq!(
|
||||
check_layer_map_key_eq(
|
||||
all_layers,
|
||||
vec![
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false
|
||||
},
|
||||
// aha, we removed all unnecessary image/delta layers and got a very clean layer map!
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(10),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false
|
||||
is_delta: false,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(4)..get_key(10),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x21),
|
||||
is_delta: false
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(1)..get_key(4),
|
||||
key_range: get_key(2)..get_key(4),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(5)..get_key(7),
|
||||
lsn_range: Lsn(0x20)..Lsn(0x48),
|
||||
is_delta: true
|
||||
is_delta: true,
|
||||
},
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(8)..get_key(10),
|
||||
lsn_range: Lsn(0x48)..Lsn(0x50),
|
||||
is_delta: true
|
||||
}
|
||||
]
|
||||
is_delta: true,
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -653,6 +653,10 @@ impl DeltaLayerWriter {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.inner.as_ref().unwrap().num_keys == 0
|
||||
}
|
||||
|
||||
///
|
||||
/// Append a key-value pair to the file.
|
||||
///
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::ops::Range;
|
||||
use std::{ops::Range, sync::Arc};
|
||||
|
||||
use anyhow::bail;
|
||||
use pageserver_api::{
|
||||
@@ -9,7 +9,10 @@ use utils::lsn::Lsn;
|
||||
|
||||
use pageserver_api::value::Value;
|
||||
|
||||
use super::merge_iterator::MergeIterator;
|
||||
use super::{
|
||||
merge_iterator::{MergeIterator, MergeIteratorItem},
|
||||
PersistentLayerKey,
|
||||
};
|
||||
|
||||
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
|
||||
///
|
||||
@@ -48,10 +51,10 @@ impl<'a> FilterIterator<'a> {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
while let Some(item) = self.inner.next().await? {
|
||||
async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {
|
||||
while let Some(item) = self.inner.next_inner::<R>().await? {
|
||||
while self.current_filter_idx < self.retain_key_filters.len()
|
||||
&& item.0 >= self.retain_key_filters[self.current_filter_idx].end
|
||||
&& item.key_lsn_value().0 >= self.retain_key_filters[self.current_filter_idx].end
|
||||
{
|
||||
// [filter region] [filter region] [filter region]
|
||||
// ^ item
|
||||
@@ -68,7 +71,7 @@ impl<'a> FilterIterator<'a> {
|
||||
// ^ current filter (nothing)
|
||||
return Ok(None);
|
||||
}
|
||||
if self.retain_key_filters[self.current_filter_idx].contains(&item.0) {
|
||||
if self.retain_key_filters[self.current_filter_idx].contains(&item.key_lsn_value().0) {
|
||||
// [filter region] [filter region] [filter region]
|
||||
// ^ item
|
||||
// ^ current filter
|
||||
@@ -81,6 +84,16 @@ impl<'a> FilterIterator<'a> {
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
self.next_inner().await
|
||||
}
|
||||
|
||||
pub async fn next_with_trace(
|
||||
&mut self,
|
||||
) -> anyhow::Result<Option<((Key, Lsn, Value), Arc<PersistentLayerKey>)>> {
|
||||
self.next_inner().await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{binary_heap, BinaryHeap},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use anyhow::bail;
|
||||
@@ -13,10 +14,11 @@ use pageserver_api::value::Value;
|
||||
use super::{
|
||||
delta_layer::{DeltaLayerInner, DeltaLayerIterator},
|
||||
image_layer::{ImageLayerInner, ImageLayerIterator},
|
||||
PersistentLayerDesc, PersistentLayerKey,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum LayerRef<'a> {
|
||||
pub(crate) enum LayerRef<'a> {
|
||||
Image(&'a ImageLayerInner),
|
||||
Delta(&'a DeltaLayerInner),
|
||||
}
|
||||
@@ -62,18 +64,20 @@ impl LayerIterRef<'_> {
|
||||
/// 1. Unified iterator for image and delta layers.
|
||||
/// 2. `Ord` for use in [`MergeIterator::heap`] (for the k-merge).
|
||||
/// 3. Lazy creation of the real delta/image iterator.
|
||||
enum IteratorWrapper<'a> {
|
||||
pub(crate) enum IteratorWrapper<'a> {
|
||||
NotLoaded {
|
||||
ctx: &'a RequestContext,
|
||||
first_key_lower_bound: (Key, Lsn),
|
||||
layer: LayerRef<'a>,
|
||||
source_desc: Arc<PersistentLayerKey>,
|
||||
},
|
||||
Loaded {
|
||||
iter: PeekableLayerIterRef<'a>,
|
||||
source_desc: Arc<PersistentLayerKey>,
|
||||
},
|
||||
}
|
||||
|
||||
struct PeekableLayerIterRef<'a> {
|
||||
pub(crate) struct PeekableLayerIterRef<'a> {
|
||||
iter: LayerIterRef<'a>,
|
||||
peeked: Option<(Key, Lsn, Value)>, // None == end
|
||||
}
|
||||
@@ -151,6 +155,12 @@ impl<'a> IteratorWrapper<'a> {
|
||||
layer: LayerRef::Image(image_layer),
|
||||
first_key_lower_bound: (image_layer.key_range().start, image_layer.lsn()),
|
||||
ctx,
|
||||
source_desc: PersistentLayerKey {
|
||||
key_range: image_layer.key_range().clone(),
|
||||
lsn_range: PersistentLayerDesc::image_layer_lsn_range(image_layer.lsn()),
|
||||
is_delta: false,
|
||||
}
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,12 +172,18 @@ impl<'a> IteratorWrapper<'a> {
|
||||
layer: LayerRef::Delta(delta_layer),
|
||||
first_key_lower_bound: (delta_layer.key_range().start, delta_layer.lsn_range().start),
|
||||
ctx,
|
||||
source_desc: PersistentLayerKey {
|
||||
key_range: delta_layer.key_range().clone(),
|
||||
lsn_range: delta_layer.lsn_range().clone(),
|
||||
is_delta: true,
|
||||
}
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_next_key_lsn_value(&self) -> Option<(&Key, Lsn, Option<&Value>)> {
|
||||
match self {
|
||||
Self::Loaded { iter } => iter
|
||||
Self::Loaded { iter, .. } => iter
|
||||
.peek()
|
||||
.as_ref()
|
||||
.map(|(key, lsn, val)| (key, *lsn, Some(val))),
|
||||
@@ -191,6 +207,7 @@ impl<'a> IteratorWrapper<'a> {
|
||||
ctx,
|
||||
first_key_lower_bound,
|
||||
layer,
|
||||
source_desc,
|
||||
} = self
|
||||
else {
|
||||
unreachable!()
|
||||
@@ -206,7 +223,10 @@ impl<'a> IteratorWrapper<'a> {
|
||||
);
|
||||
}
|
||||
}
|
||||
*self = Self::Loaded { iter };
|
||||
*self = Self::Loaded {
|
||||
iter,
|
||||
source_desc: source_desc.clone(),
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -220,11 +240,19 @@ impl<'a> IteratorWrapper<'a> {
|
||||
/// The public interfaces to use are [`crate::tenant::storage_layer::delta_layer::DeltaLayerIterator`] and
|
||||
/// [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].
|
||||
async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
let Self::Loaded { iter } = self else {
|
||||
let Self::Loaded { iter, .. } = self else {
|
||||
panic!("must load the iterator before using")
|
||||
};
|
||||
iter.next().await
|
||||
}
|
||||
|
||||
/// Get the persistent layer key corresponding to this iterator
|
||||
fn trace_source(&self) -> Arc<PersistentLayerKey> {
|
||||
match self {
|
||||
Self::Loaded { source_desc, .. } => source_desc.clone(),
|
||||
Self::NotLoaded { source_desc, .. } => source_desc.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A merge iterator over delta/image layer iterators.
|
||||
@@ -242,6 +270,32 @@ pub struct MergeIterator<'a> {
|
||||
heap: BinaryHeap<IteratorWrapper<'a>>,
|
||||
}
|
||||
|
||||
pub(crate) trait MergeIteratorItem {
|
||||
fn new(item: (Key, Lsn, Value), iterator: &IteratorWrapper<'_>) -> Self;
|
||||
|
||||
fn key_lsn_value(&self) -> &(Key, Lsn, Value);
|
||||
}
|
||||
|
||||
impl MergeIteratorItem for (Key, Lsn, Value) {
|
||||
fn new(item: (Key, Lsn, Value), _: &IteratorWrapper<'_>) -> Self {
|
||||
item
|
||||
}
|
||||
|
||||
fn key_lsn_value(&self) -> &(Key, Lsn, Value) {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeIteratorItem for ((Key, Lsn, Value), Arc<PersistentLayerKey>) {
|
||||
fn new(item: (Key, Lsn, Value), iter: &IteratorWrapper<'_>) -> Self {
|
||||
(item, iter.trace_source().clone())
|
||||
}
|
||||
|
||||
fn key_lsn_value(&self) -> &(Key, Lsn, Value) {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> MergeIterator<'a> {
|
||||
pub fn create(
|
||||
deltas: &[&'a DeltaLayerInner],
|
||||
@@ -260,7 +314,7 @@ impl<'a> MergeIterator<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
pub(crate) async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {
|
||||
while let Some(mut iter) = self.heap.peek_mut() {
|
||||
if !iter.is_loaded() {
|
||||
// Once we load the iterator, we can know the real first key-value pair in the iterator.
|
||||
@@ -275,10 +329,22 @@ impl<'a> MergeIterator<'a> {
|
||||
binary_heap::PeekMut::pop(iter);
|
||||
continue;
|
||||
};
|
||||
return Ok(Some(item));
|
||||
return Ok(Some(R::new(item, &iter)));
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Get the next key-value pair from the iterator.
|
||||
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
self.next_inner().await
|
||||
}
|
||||
|
||||
/// Get the next key-value pair from the iterator, and trace where the key comes from.
|
||||
pub async fn next_with_trace(
|
||||
&mut self,
|
||||
) -> anyhow::Result<Option<((Key, Lsn, Value), Arc<PersistentLayerKey>)>> {
|
||||
self.next_inner().await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -496,7 +562,7 @@ mod tests {
|
||||
(
|
||||
get_key(0),
|
||||
Lsn(0x10),
|
||||
Value::WalRecord(NeonWalRecord::wal_init()),
|
||||
Value::WalRecord(NeonWalRecord::wal_init("")),
|
||||
),
|
||||
(
|
||||
get_key(0),
|
||||
@@ -506,7 +572,7 @@ mod tests {
|
||||
(
|
||||
get_key(5),
|
||||
Lsn(0x10),
|
||||
Value::WalRecord(NeonWalRecord::wal_init()),
|
||||
Value::WalRecord(NeonWalRecord::wal_init("")),
|
||||
),
|
||||
(
|
||||
get_key(5),
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
//!
|
||||
//! The old legacy algorithm is implemented directly in `timeline.rs`.
|
||||
|
||||
use std::collections::{BinaryHeap, HashSet};
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
||||
use std::ops::{Deref, Range};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -56,7 +56,7 @@ use pageserver_api::value::Value;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use pageserver_compaction::helpers::overlaps_with;
|
||||
use pageserver_compaction::helpers::{fully_contains, overlaps_with};
|
||||
use pageserver_compaction::interface::*;
|
||||
|
||||
use super::CompactionError;
|
||||
@@ -64,6 +64,23 @@ use super::CompactionError;
|
||||
/// Maximum number of deltas before generating an image layer in bottom-most compaction.
|
||||
const COMPACTION_DELTA_THRESHOLD: usize = 5;
|
||||
|
||||
pub struct GcCompactionJobDescription {
|
||||
/// All layers to read in the compaction job
|
||||
selected_layers: Vec<Layer>,
|
||||
/// GC cutoff of the job
|
||||
gc_cutoff: Lsn,
|
||||
/// LSNs to retain for the job
|
||||
retain_lsns_below_horizon: Vec<Lsn>,
|
||||
/// Maximum layer LSN processed in this compaction
|
||||
max_layer_lsn: Lsn,
|
||||
/// Only compact layers overlapping with this range
|
||||
compaction_key_range: Range<Key>,
|
||||
/// When partial compaction is enabled, these layers need to be rewritten to ensure no overlap.
|
||||
/// This field is here solely for debugging. The field will not be read once the compaction
|
||||
/// description is generated.
|
||||
rewrite_layers: Vec<Arc<PersistentLayerDesc>>,
|
||||
}
|
||||
|
||||
/// The result of bottom-most compaction for a single key at each LSN.
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(test, derive(PartialEq))]
|
||||
@@ -1722,7 +1739,8 @@ impl Timeline {
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
self.partial_compact_with_gc(None, cancel, flags, ctx).await
|
||||
self.partial_compact_with_gc(Key::MIN..Key::MAX, cancel, flags, ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
/// An experimental compaction building block that combines compaction with garbage collection.
|
||||
@@ -1732,12 +1750,15 @@ impl Timeline {
|
||||
/// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon,
|
||||
/// and create delta layers with all deltas >= gc horizon.
|
||||
///
|
||||
/// If `key_range`, it will only compact the keys within the range, aka partial compaction. This functionality
|
||||
/// is not complete yet, and if it is set, only image layers will be generated.
|
||||
///
|
||||
/// If `key_range` is provided, it will only compact the keys within the range, aka partial compaction.
|
||||
/// Partial compaction will read and process all layers overlapping with the key range, even if it might
|
||||
/// contain extra keys. After the gc-compaction phase completes, delta layers that are not fully contained
|
||||
/// within the key range will be rewritten to ensure they do not overlap with the delta layers. Providing
|
||||
/// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not
|
||||
/// part of the range.
|
||||
pub(crate) async fn partial_compact_with_gc(
|
||||
self: &Arc<Self>,
|
||||
compaction_key_range: Option<Range<Key>>,
|
||||
compaction_key_range: Range<Key>,
|
||||
cancel: &CancellationToken,
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
@@ -1762,9 +1783,8 @@ impl Timeline {
|
||||
.await?;
|
||||
|
||||
let dry_run = flags.contains(CompactFlags::DryRun);
|
||||
let partial_compaction = compaction_key_range.is_some();
|
||||
|
||||
if let Some(ref compaction_key_range) = compaction_key_range {
|
||||
if compaction_key_range == (Key::MIN..Key::MAX) {
|
||||
info!("running enhanced gc bottom-most compaction, dry_run={dry_run}, compaction_key_range={}..{}", compaction_key_range.start, compaction_key_range.end);
|
||||
} else {
|
||||
info!("running enhanced gc bottom-most compaction, dry_run={dry_run}");
|
||||
@@ -1780,7 +1800,7 @@ impl Timeline {
|
||||
// The layer selection has the following properties:
|
||||
// 1. If a layer is in the selection, all layers below it are in the selection.
|
||||
// 2. Inferred from (1), for each key in the layer selection, the value can be reconstructed only with the layers in the layer selection.
|
||||
let (layer_selection, gc_cutoff, retain_lsns_below_horizon) = if !partial_compaction {
|
||||
let job_desc = {
|
||||
let guard = self.layers.read().await;
|
||||
let layers = guard.layer_map()?;
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
@@ -1810,9 +1830,21 @@ impl Timeline {
|
||||
};
|
||||
// Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key
|
||||
// layers to compact.
|
||||
let mut rewrite_layers = Vec::new();
|
||||
for desc in layers.iter_historic_layers() {
|
||||
if desc.get_lsn_range().end <= max_layer_lsn {
|
||||
if desc.get_lsn_range().end <= max_layer_lsn
|
||||
&& overlaps_with(&desc.get_key_range(), &compaction_key_range)
|
||||
{
|
||||
// If the layer overlaps with the compaction key range, we need to read it to obtain all keys within the range,
|
||||
// even if it might contain extra keys
|
||||
selected_layers.push(guard.get_from_desc(&desc));
|
||||
// If the layer is not fully contained within the key range, we need to rewrite it if it's a delta layer (it's fine
|
||||
// to overlap image layers)
|
||||
if desc.is_delta()
|
||||
&& !fully_contains(&compaction_key_range, &desc.get_key_range())
|
||||
{
|
||||
rewrite_layers.push(desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
if selected_layers.is_empty() {
|
||||
@@ -1820,82 +1852,59 @@ impl Timeline {
|
||||
return Ok(());
|
||||
}
|
||||
retain_lsns_below_horizon.sort();
|
||||
(selected_layers, gc_cutoff, retain_lsns_below_horizon)
|
||||
} else {
|
||||
// In case of partial compaction, we currently only support generating image layers, and therefore,
|
||||
// we pick all layers that are below the lowest retain_lsn and does not intersect with any of the layers.
|
||||
let guard = self.layers.read().await;
|
||||
let layers = guard.layer_map()?;
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
let mut min_lsn = gc_info.cutoffs.select_min();
|
||||
for (lsn, _, _) in &gc_info.retain_lsns {
|
||||
if lsn < &min_lsn {
|
||||
min_lsn = *lsn;
|
||||
}
|
||||
GcCompactionJobDescription {
|
||||
selected_layers,
|
||||
gc_cutoff,
|
||||
retain_lsns_below_horizon,
|
||||
max_layer_lsn,
|
||||
compaction_key_range,
|
||||
rewrite_layers,
|
||||
}
|
||||
for lsn in gc_info.leases.keys() {
|
||||
if lsn < &min_lsn {
|
||||
min_lsn = *lsn;
|
||||
}
|
||||
}
|
||||
let mut selected_layers = Vec::new();
|
||||
drop(gc_info);
|
||||
// |-------| |-------| |-------|
|
||||
// | Delta | | Delta | | Delta | -- min_lsn could be intersecting with the layers
|
||||
// |-------| |-------| |-------| <- we want to pick all the layers below min_lsn, so that
|
||||
// | Delta | | Delta | | Delta | ...we can remove them after compaction
|
||||
// |-------| |-------| |-------|
|
||||
// Pick all the layers intersect or below the min_lsn, get the largest LSN in the selected layers.
|
||||
let Some(compaction_key_range) = compaction_key_range.as_ref() else {
|
||||
unreachable!()
|
||||
};
|
||||
for desc in layers.iter_historic_layers() {
|
||||
if desc.get_lsn_range().end <= min_lsn
|
||||
&& overlaps_with(&desc.key_range, compaction_key_range)
|
||||
{
|
||||
selected_layers.push(guard.get_from_desc(&desc));
|
||||
}
|
||||
}
|
||||
if selected_layers.is_empty() {
|
||||
info!("no layers to compact with gc");
|
||||
return Ok(());
|
||||
}
|
||||
(selected_layers, min_lsn, Vec::new())
|
||||
};
|
||||
let lowest_retain_lsn = if self.ancestor_timeline.is_some() {
|
||||
if partial_compaction {
|
||||
warn!("partial compaction cannot run on child branches (for now)");
|
||||
return Ok(());
|
||||
}
|
||||
Lsn(self.ancestor_lsn.0 + 1)
|
||||
} else {
|
||||
let res = retain_lsns_below_horizon
|
||||
let res = job_desc
|
||||
.retain_lsns_below_horizon
|
||||
.first()
|
||||
.copied()
|
||||
.unwrap_or(gc_cutoff);
|
||||
.unwrap_or(job_desc.gc_cutoff);
|
||||
if cfg!(debug_assertions) {
|
||||
assert_eq!(
|
||||
res,
|
||||
retain_lsns_below_horizon
|
||||
job_desc
|
||||
.retain_lsns_below_horizon
|
||||
.iter()
|
||||
.min()
|
||||
.copied()
|
||||
.unwrap_or(gc_cutoff)
|
||||
.unwrap_or(job_desc.gc_cutoff)
|
||||
);
|
||||
}
|
||||
res
|
||||
};
|
||||
info!(
|
||||
"picked {} layers for compaction with gc_cutoff={} lowest_retain_lsn={}",
|
||||
layer_selection.len(),
|
||||
gc_cutoff,
|
||||
lowest_retain_lsn
|
||||
"picked {} layers for compaction ({} layers need rewriting) with max_layer_lsn={} gc_cutoff={} lowest_retain_lsn={}, key_range={}..{}",
|
||||
job_desc.selected_layers.len(),
|
||||
job_desc.rewrite_layers.len(),
|
||||
job_desc.max_layer_lsn,
|
||||
job_desc.gc_cutoff,
|
||||
lowest_retain_lsn,
|
||||
job_desc.compaction_key_range.start,
|
||||
job_desc.compaction_key_range.end
|
||||
);
|
||||
|
||||
self.check_compaction_space(&layer_selection).await?;
|
||||
for layer in &job_desc.selected_layers {
|
||||
debug!("read layer: {}", layer.layer_desc().key());
|
||||
}
|
||||
for layer in &job_desc.rewrite_layers {
|
||||
debug!("rewrite layer: {}", layer.key());
|
||||
}
|
||||
|
||||
self.check_compaction_space(&job_desc.selected_layers)
|
||||
.await?;
|
||||
|
||||
// Generate statistics for the compaction
|
||||
for layer in &layer_selection {
|
||||
for layer in &job_desc.selected_layers {
|
||||
let desc = layer.layer_desc();
|
||||
if desc.is_delta() {
|
||||
stat.visit_delta_layer(desc.file_size());
|
||||
@@ -1906,25 +1915,25 @@ impl Timeline {
|
||||
|
||||
// Step 1: construct a k-merge iterator over all layers.
|
||||
// Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
|
||||
let layer_names: Vec<crate::tenant::storage_layer::LayerName> = layer_selection
|
||||
let layer_names = job_desc
|
||||
.selected_layers
|
||||
.iter()
|
||||
.map(|layer| layer.layer_desc().layer_name())
|
||||
.collect_vec();
|
||||
if let Some(err) = check_valid_layermap(&layer_names) {
|
||||
bail!("cannot run gc-compaction because {}", err);
|
||||
warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err);
|
||||
}
|
||||
// The maximum LSN we are processing in this compaction loop
|
||||
let end_lsn = layer_selection
|
||||
let end_lsn = job_desc
|
||||
.selected_layers
|
||||
.iter()
|
||||
.map(|l| l.layer_desc().lsn_range.end)
|
||||
.max()
|
||||
.unwrap();
|
||||
// We don't want any of the produced layers to cover the full key range (i.e., MIN..MAX) b/c it will then be recognized
|
||||
// as an L0 layer.
|
||||
let mut delta_layers = Vec::new();
|
||||
let mut image_layers = Vec::new();
|
||||
let mut downloaded_layers = Vec::new();
|
||||
for layer in &layer_selection {
|
||||
for layer in &job_desc.selected_layers {
|
||||
let resident_layer = layer.download_and_keep_resident().await?;
|
||||
downloaded_layers.push(resident_layer);
|
||||
}
|
||||
@@ -1943,8 +1952,8 @@ impl Timeline {
|
||||
dense_ks,
|
||||
sparse_ks,
|
||||
)?;
|
||||
// Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas.
|
||||
// Data of the same key.
|
||||
|
||||
// Step 2: Produce images+deltas.
|
||||
let mut accumulated_values = Vec::new();
|
||||
let mut last_key: Option<Key> = None;
|
||||
|
||||
@@ -1956,10 +1965,7 @@ impl Timeline {
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
compaction_key_range
|
||||
.as_ref()
|
||||
.map(|x| x.start)
|
||||
.unwrap_or(Key::MIN),
|
||||
job_desc.compaction_key_range.start,
|
||||
lowest_retain_lsn,
|
||||
self.get_compaction_target_size(),
|
||||
ctx,
|
||||
@@ -1979,6 +1985,13 @@ impl Timeline {
|
||||
)
|
||||
.await?;
|
||||
|
||||
#[derive(Default)]
|
||||
struct RewritingLayers {
|
||||
before: Option<DeltaLayerWriter>,
|
||||
after: Option<DeltaLayerWriter>,
|
||||
}
|
||||
let mut delta_layer_rewriters = HashMap::<Arc<PersistentLayerKey>, RewritingLayers>::new();
|
||||
|
||||
/// Returns None if there is no ancestor branch. Throw an error when the key is not found.
|
||||
///
|
||||
/// Currently, we always get the ancestor image for each key in the child branch no matter whether the image
|
||||
@@ -2004,10 +2017,51 @@ impl Timeline {
|
||||
// the key and LSN range are determined. However, to keep things simple here, we still
|
||||
// create this writer, and discard the writer in the end.
|
||||
|
||||
while let Some((key, lsn, val)) = merge_iter.next().await? {
|
||||
while let Some(((key, lsn, val), desc)) = merge_iter.next_with_trace().await? {
|
||||
if cancel.is_cancelled() {
|
||||
return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
|
||||
}
|
||||
if !job_desc.compaction_key_range.contains(&key) {
|
||||
if !desc.is_delta {
|
||||
continue;
|
||||
}
|
||||
let rewriter = delta_layer_rewriters.entry(desc.clone()).or_default();
|
||||
let rewriter = if key < job_desc.compaction_key_range.start {
|
||||
if rewriter.before.is_none() {
|
||||
rewriter.before = Some(
|
||||
DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
desc.key_range.start,
|
||||
desc.lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
rewriter.before.as_mut().unwrap()
|
||||
} else if key >= job_desc.compaction_key_range.end {
|
||||
if rewriter.after.is_none() {
|
||||
rewriter.after = Some(
|
||||
DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
job_desc.compaction_key_range.end,
|
||||
desc.lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
rewriter.after.as_mut().unwrap()
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
rewriter.put_value(key, lsn, val, ctx).await?;
|
||||
continue;
|
||||
}
|
||||
match val {
|
||||
Value::Image(_) => stat.visit_image_key(&val),
|
||||
Value::WalRecord(_) => stat.visit_wal_key(&val),
|
||||
@@ -2018,35 +2072,27 @@ impl Timeline {
|
||||
}
|
||||
accumulated_values.push((key, lsn, val));
|
||||
} else {
|
||||
let last_key = last_key.as_mut().unwrap();
|
||||
stat.on_unique_key_visited();
|
||||
let skip_adding_key = if let Some(ref compaction_key_range) = compaction_key_range {
|
||||
!compaction_key_range.contains(last_key)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if !skip_adding_key {
|
||||
let retention = self
|
||||
.generate_key_retention(
|
||||
*last_key,
|
||||
&accumulated_values,
|
||||
gc_cutoff,
|
||||
&retain_lsns_below_horizon,
|
||||
COMPACTION_DELTA_THRESHOLD,
|
||||
get_ancestor_image(self, *last_key, ctx).await?,
|
||||
)
|
||||
.await?;
|
||||
// Put the image into the image layer. Currently we have a single big layer for the compaction.
|
||||
retention
|
||||
.pipe_to(
|
||||
*last_key,
|
||||
&mut delta_layer_writer,
|
||||
image_layer_writer.as_mut(),
|
||||
&mut stat,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
let last_key: &mut Key = last_key.as_mut().unwrap();
|
||||
stat.on_unique_key_visited(); // TODO: adjust statistics for partial compaction
|
||||
let retention = self
|
||||
.generate_key_retention(
|
||||
*last_key,
|
||||
&accumulated_values,
|
||||
job_desc.gc_cutoff,
|
||||
&job_desc.retain_lsns_below_horizon,
|
||||
COMPACTION_DELTA_THRESHOLD,
|
||||
get_ancestor_image(self, *last_key, ctx).await?,
|
||||
)
|
||||
.await?;
|
||||
retention
|
||||
.pipe_to(
|
||||
*last_key,
|
||||
&mut delta_layer_writer,
|
||||
image_layer_writer.as_mut(),
|
||||
&mut stat,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
accumulated_values.clear();
|
||||
*last_key = key;
|
||||
accumulated_values.push((key, lsn, val));
|
||||
@@ -2057,35 +2103,43 @@ impl Timeline {
|
||||
let last_key = last_key.expect("no keys produced during compaction");
|
||||
stat.on_unique_key_visited();
|
||||
|
||||
let skip_adding_key = if let Some(ref compaction_key_range) = compaction_key_range {
|
||||
!compaction_key_range.contains(&last_key)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if !skip_adding_key {
|
||||
let retention = self
|
||||
.generate_key_retention(
|
||||
last_key,
|
||||
&accumulated_values,
|
||||
gc_cutoff,
|
||||
&retain_lsns_below_horizon,
|
||||
COMPACTION_DELTA_THRESHOLD,
|
||||
get_ancestor_image(self, last_key, ctx).await?,
|
||||
)
|
||||
.await?;
|
||||
// Put the image into the image layer. Currently we have a single big layer for the compaction.
|
||||
retention
|
||||
.pipe_to(
|
||||
last_key,
|
||||
&mut delta_layer_writer,
|
||||
image_layer_writer.as_mut(),
|
||||
&mut stat,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
let retention = self
|
||||
.generate_key_retention(
|
||||
last_key,
|
||||
&accumulated_values,
|
||||
job_desc.gc_cutoff,
|
||||
&job_desc.retain_lsns_below_horizon,
|
||||
COMPACTION_DELTA_THRESHOLD,
|
||||
get_ancestor_image(self, last_key, ctx).await?,
|
||||
)
|
||||
.await?;
|
||||
retention
|
||||
.pipe_to(
|
||||
last_key,
|
||||
&mut delta_layer_writer,
|
||||
image_layer_writer.as_mut(),
|
||||
&mut stat,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
// end: move the above part to the loop body
|
||||
|
||||
let mut rewrote_delta_layers = Vec::new();
|
||||
for (key, writers) in delta_layer_rewriters {
|
||||
if let Some(delta_writer_before) = writers.before {
|
||||
let (desc, path) = delta_writer_before
|
||||
.finish(job_desc.compaction_key_range.start, ctx)
|
||||
.await?;
|
||||
let layer = Layer::finish_creating(self.conf, self, desc, &path)?;
|
||||
rewrote_delta_layers.push(layer);
|
||||
}
|
||||
if let Some(delta_writer_after) = writers.after {
|
||||
let (desc, path) = delta_writer_after.finish(key.key_range.end, ctx).await?;
|
||||
let layer = Layer::finish_creating(self.conf, self, desc, &path)?;
|
||||
rewrote_delta_layers.push(layer);
|
||||
}
|
||||
}
|
||||
|
||||
let discard = |key: &PersistentLayerKey| {
|
||||
let key = key.clone();
|
||||
async move { KeyHistoryRetention::discard_key(&key, self, dry_run).await }
|
||||
@@ -2093,10 +2147,7 @@ impl Timeline {
|
||||
|
||||
let produced_image_layers = if let Some(writer) = image_layer_writer {
|
||||
if !dry_run {
|
||||
let end_key = compaction_key_range
|
||||
.as_ref()
|
||||
.map(|x| x.end)
|
||||
.unwrap_or(Key::MAX);
|
||||
let end_key = job_desc.compaction_key_range.end;
|
||||
writer
|
||||
.finish_with_discard_fn(self, ctx, end_key, discard)
|
||||
.await?
|
||||
@@ -2117,10 +2168,8 @@ impl Timeline {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
if partial_compaction && !produced_delta_layers.is_empty() {
|
||||
bail!("implementation error: partial compaction should not be producing delta layers (for now)");
|
||||
}
|
||||
|
||||
// TODO: make image/delta/rewrote_delta layers generation atomic. At this point, we already generated resident layers, and if
|
||||
// compaction is cancelled at this point, we might have some layers that are not cleaned up.
|
||||
let mut compact_to = Vec::new();
|
||||
let mut keep_layers = HashSet::new();
|
||||
let produced_delta_layers_len = produced_delta_layers.len();
|
||||
@@ -2128,52 +2177,84 @@ impl Timeline {
|
||||
for action in produced_delta_layers {
|
||||
match action {
|
||||
BatchWriterResult::Produced(layer) => {
|
||||
if cfg!(debug_assertions) {
|
||||
info!("produced delta layer: {}", layer.layer_desc().key());
|
||||
}
|
||||
stat.produce_delta_layer(layer.layer_desc().file_size());
|
||||
compact_to.push(layer);
|
||||
}
|
||||
BatchWriterResult::Discarded(l) => {
|
||||
if cfg!(debug_assertions) {
|
||||
info!("discarded delta layer: {}", l);
|
||||
}
|
||||
keep_layers.insert(l);
|
||||
stat.discard_delta_layer();
|
||||
}
|
||||
}
|
||||
}
|
||||
for layer in &rewrote_delta_layers {
|
||||
debug!(
|
||||
"produced rewritten delta layer: {}",
|
||||
layer.layer_desc().key()
|
||||
);
|
||||
}
|
||||
compact_to.extend(rewrote_delta_layers);
|
||||
for action in produced_image_layers {
|
||||
match action {
|
||||
BatchWriterResult::Produced(layer) => {
|
||||
debug!("produced image layer: {}", layer.layer_desc().key());
|
||||
stat.produce_image_layer(layer.layer_desc().file_size());
|
||||
compact_to.push(layer);
|
||||
}
|
||||
BatchWriterResult::Discarded(l) => {
|
||||
debug!("discarded image layer: {}", l);
|
||||
keep_layers.insert(l);
|
||||
stat.discard_image_layer();
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut layer_selection = layer_selection;
|
||||
layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));
|
||||
if let Some(ref compaction_key_range) = compaction_key_range {
|
||||
// Partial compaction might select more data than it processes, e.g., if
|
||||
// the compaction_key_range only partially overlaps:
|
||||
//
|
||||
// [---compaction_key_range---]
|
||||
// [---A----][----B----][----C----][----D----]
|
||||
//
|
||||
// A,B,C,D are all in the `layer_selection`. The created image layers contain
|
||||
// whatever is needed from B, C, and from `----]` of A, and from `[--` of D.
|
||||
//
|
||||
// In contrast, `[--A-` and `--D----]` have not been processed, so, we must
|
||||
// keep that data.
|
||||
//
|
||||
// The solution for now is to keep A and D completely.
|
||||
// (layer_selection is what we'll remove from the layer map, so,
|
||||
// retain what is _not_ fully covered by compaction_key_range).
|
||||
layer_selection.retain(|x| {
|
||||
let key_range = &x.layer_desc().key_range;
|
||||
key_range.start >= compaction_key_range.start
|
||||
&& key_range.end <= compaction_key_range.end
|
||||
});
|
||||
|
||||
let mut layer_selection = job_desc.selected_layers;
|
||||
|
||||
// Partial compaction might select more data than it processes, e.g., if
|
||||
// the compaction_key_range only partially overlaps:
|
||||
//
|
||||
// [---compaction_key_range---]
|
||||
// [---A----][----B----][----C----][----D----]
|
||||
//
|
||||
// For delta layers, we will rewrite the layers so that it is cut exactly at
|
||||
// the compaction key range, so we can always discard them. However, for image
|
||||
// layers, as we do not rewrite them for now, we need to handle them differently.
|
||||
// Assume image layers A, B, C, D are all in the `layer_selection`.
|
||||
//
|
||||
// The created image layers contain whatever is needed from B, C, and from
|
||||
// `----]` of A, and from `[---` of D.
|
||||
//
|
||||
// In contrast, `[---A` and `D----]` have not been processed, so, we must
|
||||
// keep that data.
|
||||
//
|
||||
// The solution for now is to keep A and D completely if they are image layers.
|
||||
// (layer_selection is what we'll remove from the layer map, so, retain what
|
||||
// is _not_ fully covered by compaction_key_range).
|
||||
for layer in &layer_selection {
|
||||
if !layer.layer_desc().is_delta() {
|
||||
if !overlaps_with(
|
||||
&layer.layer_desc().key_range,
|
||||
&job_desc.compaction_key_range,
|
||||
) {
|
||||
bail!("violated constraint: image layer outside of compaction key range");
|
||||
}
|
||||
if !fully_contains(
|
||||
&job_desc.compaction_key_range,
|
||||
&layer.layer_desc().key_range,
|
||||
) {
|
||||
keep_layers.insert(layer.layer_desc().key());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));
|
||||
|
||||
info!(
|
||||
"gc-compaction statistics: {}",
|
||||
serde_json::to_string(&stat)?
|
||||
@@ -2192,6 +2273,7 @@ impl Timeline {
|
||||
|
||||
// Step 3: Place back to the layer map.
|
||||
{
|
||||
// TODO: sanity check if the layer map is valid (i.e., should not have overlaps)
|
||||
let mut guard = self.layers.write().await;
|
||||
guard
|
||||
.open_mut()?
|
||||
|
||||
@@ -253,6 +253,10 @@ pub(crate) fn apply_in_neon(
|
||||
use bytes::BufMut;
|
||||
if *will_init {
|
||||
assert!(*clear, "init record must be clear to ensure correctness");
|
||||
assert!(
|
||||
page.is_empty(),
|
||||
"init record must be the first entry to ensure correctness"
|
||||
);
|
||||
}
|
||||
if *clear {
|
||||
page.clear();
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
#include <dirent.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <dirent.h>
|
||||
#include <signal.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
@@ -21,17 +22,35 @@
|
||||
|
||||
static int logical_replication_max_snap_files = 300;
|
||||
|
||||
/*
|
||||
* According to Chi (shyzh), the pageserver _should_ be good with 10 MB worth of
|
||||
* snapshot files. Let's use 8 MB since 8 is a power of 2.
|
||||
*/
|
||||
static int logical_replication_max_logicalsnapdir_size = 8000;
|
||||
|
||||
/*
|
||||
* A primitive description of a logical snapshot file including the LSN of the
|
||||
* file and its size.
|
||||
*/
|
||||
typedef struct SnapDesc {
|
||||
XLogRecPtr lsn;
|
||||
off_t sz;
|
||||
} SnapDesc;
|
||||
|
||||
PGDLLEXPORT void LogicalSlotsMonitorMain(Datum main_arg);
|
||||
|
||||
/*
|
||||
* Sorts an array of snapshot descriptors by their LSN.
|
||||
*/
|
||||
static int
|
||||
LsnDescComparator(const void *a, const void *b)
|
||||
SnapDescComparator(const void *a, const void *b)
|
||||
{
|
||||
XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
|
||||
XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
|
||||
const SnapDesc *desc1 = a;
|
||||
const SnapDesc *desc2 = b;
|
||||
|
||||
if (lsn1 < lsn2)
|
||||
if (desc1->lsn < desc2->lsn)
|
||||
return 1;
|
||||
else if (lsn1 == lsn2)
|
||||
else if (desc1->lsn == desc2->lsn)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
@@ -43,28 +62,39 @@ LsnDescComparator(const void *a, const void *b)
|
||||
* slots having lower restart_lsn should be dropped.
|
||||
*/
|
||||
static XLogRecPtr
|
||||
get_num_snap_files_lsn_threshold(void)
|
||||
get_snapshots_cutoff_lsn(void)
|
||||
{
|
||||
DIR *dirdesc;
|
||||
struct dirent *de;
|
||||
char *snap_path = "pg_logical/snapshots/";
|
||||
int lsns_allocated = 1024;
|
||||
int lsns_num = 0;
|
||||
XLogRecPtr *lsns;
|
||||
XLogRecPtr cutoff;
|
||||
/* PG 18 has a constant defined for this, PG_LOGICAL_SNAPSHOTS_DIR */
|
||||
#define SNAPDIR "pg_logical/snapshots"
|
||||
|
||||
if (logical_replication_max_snap_files < 0)
|
||||
DIR *dirdesc;
|
||||
int dirdesc_fd;
|
||||
struct dirent *de;
|
||||
size_t snapshot_index = 0;
|
||||
SnapDesc *snapshot_descriptors;
|
||||
size_t descriptors_allocated = 1024;
|
||||
XLogRecPtr cutoff = 0;
|
||||
off_t logicalsnapdir_size = 0;
|
||||
const int logical_replication_max_logicalsnapdir_size_bytes = logical_replication_max_logicalsnapdir_size * 1000;
|
||||
|
||||
if (logical_replication_max_snap_files < 0 && logical_replication_max_logicalsnapdir_size < 0)
|
||||
return 0;
|
||||
|
||||
lsns = palloc(sizeof(XLogRecPtr) * lsns_allocated);
|
||||
snapshot_descriptors = palloc(sizeof(*snapshot_descriptors) * descriptors_allocated);
|
||||
|
||||
dirdesc = AllocateDir(SNAPDIR);
|
||||
dirdesc_fd = dirfd(dirdesc);
|
||||
if (dirdesc_fd == -1)
|
||||
ereport(ERROR, errmsg("failed to get a file descriptor for " SNAPDIR ": %m"));
|
||||
|
||||
/* find all .snap files and get their lsns */
|
||||
dirdesc = AllocateDir(snap_path);
|
||||
while ((de = ReadDir(dirdesc, snap_path)) != NULL)
|
||||
while ((de = ReadDir(dirdesc, SNAPDIR)) != NULL)
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
uint32 hi;
|
||||
uint32 lo;
|
||||
struct stat st;
|
||||
XLogRecPtr lsn;
|
||||
SnapDesc *desc;
|
||||
|
||||
if (strcmp(de->d_name, ".") == 0 ||
|
||||
strcmp(de->d_name, "..") == 0)
|
||||
@@ -79,28 +109,69 @@ get_num_snap_files_lsn_threshold(void)
|
||||
|
||||
lsn = ((uint64) hi) << 32 | lo;
|
||||
elog(DEBUG5, "found snap file %X/%X", LSN_FORMAT_ARGS(lsn));
|
||||
if (lsns_allocated == lsns_num)
|
||||
|
||||
if (fstatat(dirdesc_fd, de->d_name, &st, 0) == -1)
|
||||
ereport(ERROR, errmsg("failed to get the size of " SNAPDIR "/%s: %m", de->d_name));
|
||||
|
||||
if (descriptors_allocated == snapshot_index)
|
||||
{
|
||||
lsns_allocated *= 2;
|
||||
lsns = repalloc(lsns, sizeof(XLogRecPtr) * lsns_allocated);
|
||||
descriptors_allocated *= 2;
|
||||
snapshot_descriptors = repalloc(snapshot_descriptors, sizeof(*snapshot_descriptors) * descriptors_allocated);
|
||||
}
|
||||
lsns[lsns_num++] = lsn;
|
||||
|
||||
desc = &snapshot_descriptors[snapshot_index++];
|
||||
desc->lsn = lsn;
|
||||
desc->sz = st.st_size;
|
||||
}
|
||||
/* sort by lsn desc */
|
||||
qsort(lsns, lsns_num, sizeof(XLogRecPtr), LsnDescComparator);
|
||||
/* and take cutoff at logical_replication_max_snap_files */
|
||||
if (logical_replication_max_snap_files > lsns_num)
|
||||
cutoff = 0;
|
||||
/* have less files than cutoff */
|
||||
else
|
||||
|
||||
qsort(snapshot_descriptors, snapshot_index, sizeof(*snapshot_descriptors), SnapDescComparator);
|
||||
|
||||
/* Are there more snapshot files than specified? */
|
||||
if (logical_replication_max_snap_files <= snapshot_index)
|
||||
{
|
||||
cutoff = lsns[logical_replication_max_snap_files - 1];
|
||||
elog(LOG, "ls_monitor: dropping logical slots with restart_lsn lower %X/%X, found %d .snap files, limit is %d",
|
||||
LSN_FORMAT_ARGS(cutoff), lsns_num, logical_replication_max_snap_files);
|
||||
cutoff = snapshot_descriptors[logical_replication_max_snap_files - 1].lsn;
|
||||
elog(LOG,
|
||||
"ls_monitor: dropping logical slots with restart_lsn lower %X/%X, found %zu snapshot files, limit is %d",
|
||||
LSN_FORMAT_ARGS(cutoff), snapshot_index, logical_replication_max_snap_files);
|
||||
}
|
||||
pfree(lsns);
|
||||
|
||||
/* Is the size of the logical snapshots directory larger than specified?
|
||||
*
|
||||
* It's possible we could hit both thresholds, so remove any extra files
|
||||
* first, and then truncate based on size of the remaining files.
|
||||
*/
|
||||
if (logicalsnapdir_size > logical_replication_max_logicalsnapdir_size_bytes)
|
||||
{
|
||||
/* Unfortunately, iterating the directory does not guarantee any order
|
||||
* so we can't cache an index in the preceding loop.
|
||||
*/
|
||||
|
||||
off_t sz;
|
||||
const XLogRecPtr original = cutoff;
|
||||
|
||||
sz = snapshot_descriptors[0].sz;
|
||||
for (size_t i = 1; i < logical_replication_max_snap_files; ++i)
|
||||
{
|
||||
if (sz > logical_replication_max_logicalsnapdir_size_bytes)
|
||||
{
|
||||
cutoff = snapshot_descriptors[i - 1].lsn;
|
||||
break;
|
||||
}
|
||||
|
||||
sz += snapshot_descriptors[i].sz;
|
||||
}
|
||||
|
||||
if (cutoff != original)
|
||||
elog(LOG, "ls_monitor: dropping logical slots with restart_lsn lower than %X/%X, " SNAPDIR " is larger than %d KB",
|
||||
LSN_FORMAT_ARGS(cutoff), logical_replication_max_logicalsnapdir_size);
|
||||
}
|
||||
|
||||
pfree(snapshot_descriptors);
|
||||
FreeDir(dirdesc);
|
||||
|
||||
return cutoff;
|
||||
|
||||
#undef SNAPDIR
|
||||
}
|
||||
|
||||
void
|
||||
@@ -118,6 +189,16 @@ InitLogicalReplicationMonitor(void)
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable(
|
||||
"neon.logical_replication_max_logicalsnapdir_size",
|
||||
"Maximum allowed size of the pg_logical/snapshots directory (KB). When exceeded, slots are dropped until the limit is met. -1 disables the limit.",
|
||||
NULL,
|
||||
&logical_replication_max_logicalsnapdir_size,
|
||||
8000, -1, INT_MAX,
|
||||
PGC_SIGHUP,
|
||||
GUC_UNIT_KB,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
memset(&bgw, 0, sizeof(bgw));
|
||||
bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
|
||||
bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
|
||||
@@ -162,7 +243,7 @@ LogicalSlotsMonitorMain(Datum main_arg)
|
||||
* If there are too many .snap files, just drop all logical slots to
|
||||
* prevent aux files bloat.
|
||||
*/
|
||||
cutoff_lsn = get_num_snap_files_lsn_threshold();
|
||||
cutoff_lsn = get_snapshots_cutoff_lsn();
|
||||
if (cutoff_lsn > 0)
|
||||
{
|
||||
for (int i = 0; i < max_replication_slots; i++)
|
||||
|
||||
@@ -262,7 +262,7 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) {
|
||||
|
||||
// Send requests.
|
||||
for req in reqgen {
|
||||
_ = reply_rx.try_recv(); // discard any replies, to avoid blocking
|
||||
while reply_rx.try_recv().is_ok() {} // discard replies, to avoid blocking
|
||||
let msg = ProposerAcceptorMessage::AppendRequest(req);
|
||||
msg_tx.send(msg).await.expect("send failed");
|
||||
}
|
||||
|
||||
@@ -171,7 +171,6 @@ struct Args {
|
||||
#[arg(long)]
|
||||
walsenders_keep_horizon: bool,
|
||||
/// Controls how long backup will wait until uploading the partial segment.
|
||||
/// The actual waiting time is passed value + rand of it to add some jitter.
|
||||
#[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_PARTIAL_BACKUP_TIMEOUT, verbatim_doc_comment)]
|
||||
partial_backup_timeout: Duration,
|
||||
/// Disable task to push messages to broker every second. Supposed to
|
||||
@@ -196,7 +195,6 @@ struct Args {
|
||||
/// if it weren't for `eviction_min_resident` preventing that.
|
||||
///
|
||||
/// Also defines interval for eviction retries.
|
||||
/// The actual waiting time is passed value + rand of it to add some jitter.
|
||||
#[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_EVICTION_MIN_RESIDENT)]
|
||||
eviction_min_resident: Duration,
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ pub static WRITE_WAL_SECONDS: Lazy<Histogram> = Lazy::new(|| {
|
||||
pub static FLUSH_WAL_SECONDS: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"safekeeper_flush_wal_seconds",
|
||||
"Seconds spent syncing WAL to a disk",
|
||||
"Seconds spent syncing WAL to a disk (excluding segment initialization)",
|
||||
DISK_FSYNC_SECONDS_BUCKETS.to_vec()
|
||||
)
|
||||
.expect("Failed to register safekeeper_flush_wal_seconds histogram")
|
||||
@@ -217,7 +217,8 @@ pub static WAL_RECEIVER_QUEUE_DEPTH: Lazy<Histogram> = Lazy::new(|| {
|
||||
let mut buckets = pow2_buckets(1, MSG_QUEUE_SIZE);
|
||||
buckets.insert(0, 0.0);
|
||||
buckets.insert(buckets.len() - 1, (MSG_QUEUE_SIZE - 1) as f64);
|
||||
assert!(buckets.len() <= 12, "too many histogram buckets");
|
||||
// TODO: tweak this.
|
||||
assert!(buckets.len() <= 16, "too many histogram buckets");
|
||||
|
||||
register_histogram!(
|
||||
"safekeeper_wal_receiver_queue_depth",
|
||||
|
||||
@@ -7,14 +7,15 @@ use crate::metrics::{
|
||||
WAL_RECEIVERS, WAL_RECEIVER_QUEUE_DEPTH, WAL_RECEIVER_QUEUE_DEPTH_TOTAL,
|
||||
WAL_RECEIVER_QUEUE_SIZE_TOTAL,
|
||||
};
|
||||
use crate::safekeeper::AcceptorProposerMessage;
|
||||
use crate::safekeeper::ProposerAcceptorMessage;
|
||||
use crate::safekeeper::ServerInfo;
|
||||
use crate::safekeeper::{
|
||||
AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,
|
||||
ServerInfo,
|
||||
};
|
||||
use crate::timeline::WalResidentTimeline;
|
||||
use crate::wal_service::ConnectionId;
|
||||
use crate::GlobalTimelines;
|
||||
use anyhow::{anyhow, Context};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{BufMut as _, Bytes, BytesMut};
|
||||
use parking_lot::MappedMutexGuard;
|
||||
use parking_lot::Mutex;
|
||||
use parking_lot::MutexGuard;
|
||||
@@ -206,7 +207,8 @@ impl Drop for WalReceiverGuard {
|
||||
}
|
||||
}
|
||||
|
||||
pub const MSG_QUEUE_SIZE: usize = 256;
|
||||
// TODO: reconsider this.
|
||||
pub const MSG_QUEUE_SIZE: usize = 4096;
|
||||
pub const REPLY_QUEUE_SIZE: usize = 16;
|
||||
|
||||
impl SafekeeperPostgresHandler {
|
||||
@@ -484,6 +486,9 @@ const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
|
||||
/// every 5 seconds, for 12 samples per poll. This will give a count of up to 12x active timelines.
|
||||
const METRICS_INTERVAL: Duration = Duration::from_secs(5);
|
||||
|
||||
/// The AppendRequest buffer size.
|
||||
const APPEND_BUFFER_SIZE: usize = 1024 * 1024;
|
||||
|
||||
/// Encapsulates a task which takes messages from msg_rx, processes and pushes
|
||||
/// replies to reply_tx.
|
||||
///
|
||||
@@ -530,6 +535,9 @@ impl WalAcceptor {
|
||||
async fn run(&mut self) -> anyhow::Result<()> {
|
||||
let walreceiver_guard = self.tli.get_walreceivers().register(self.conn_id);
|
||||
|
||||
// Buffer AppendRequests to submit them as a single large write.
|
||||
let mut append_buf = BufferedAppendRequest::new(APPEND_BUFFER_SIZE);
|
||||
|
||||
// Periodically flush the WAL and compute metrics.
|
||||
let mut flush_ticker = tokio::time::interval(FLUSH_INTERVAL);
|
||||
flush_ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
@@ -546,7 +554,7 @@ impl WalAcceptor {
|
||||
// Process inbound message.
|
||||
msg = self.msg_rx.recv() => {
|
||||
// If disconnected, break to flush WAL and return.
|
||||
let Some(mut msg) = msg else {
|
||||
let Some(msg) = msg else {
|
||||
break;
|
||||
};
|
||||
|
||||
@@ -563,11 +571,44 @@ impl WalAcceptor {
|
||||
// This batches multiple appends per fsync. If the channel is empty after
|
||||
// sending the reply, we'll schedule an immediate flush.
|
||||
if let ProposerAcceptorMessage::AppendRequest(append_request) = msg {
|
||||
msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request);
|
||||
dirty = true;
|
||||
}
|
||||
// Try to batch multiple messages into a single large write.
|
||||
if !append_buf.is_empty() || !self.msg_rx.is_empty() {
|
||||
if append_buf.add(&append_request) {
|
||||
continue; // message buffered, go get next message
|
||||
}
|
||||
|
||||
self.tli.process_msg(&msg).await?
|
||||
// Full buffer, write it and buffer this message for next iteration.
|
||||
dirty = true;
|
||||
let buf_req = append_buf.take().expect("empty buffer");
|
||||
let buf_msg = ProposerAcceptorMessage::NoFlushAppendRequest(buf_req);
|
||||
let reply = self.tli.process_msg(&buf_msg).await?;
|
||||
drop(buf_msg); // allow reusing buffer for add
|
||||
assert!(append_buf.add(&append_request), "empty buffer rejected msg");
|
||||
reply
|
||||
} else {
|
||||
dirty = true;
|
||||
let msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request);
|
||||
self.tli.process_msg(&msg).await?
|
||||
}
|
||||
} else {
|
||||
self.tli.process_msg(&msg).await?
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no pending messages, write the append buffer.
|
||||
//
|
||||
// NB: we don't also flush the WAL here. Otherwise we can get into a regime where we
|
||||
// quickly drain msg_rx and fsync before the sender is able to repopulate msg_rx.
|
||||
// This happens consistently due to Tokio scheduling, leading to overeager fsyncing.
|
||||
// Instead, we perform the write without fsyncing and give the sender a chance to
|
||||
// get scheduled and populate msg_rx for the next iteration. If there are no further
|
||||
// messages, the next iteration will flush the WAL.
|
||||
_ = future::ready(()), if self.msg_rx.is_empty() && !append_buf.is_empty() => {
|
||||
dirty = true;
|
||||
let buf_req = append_buf.take().expect("empty buffer");
|
||||
self.tli
|
||||
.process_msg(&ProposerAcceptorMessage::NoFlushAppendRequest(buf_req))
|
||||
.await?
|
||||
}
|
||||
|
||||
// While receiving AppendRequests, flush the WAL periodically and respond with an
|
||||
@@ -579,11 +620,11 @@ impl WalAcceptor {
|
||||
.await?
|
||||
}
|
||||
|
||||
// If there are no pending messages, flush the WAL immediately.
|
||||
// If there are no pending messages, flush the WAL and append buffer immediately.
|
||||
//
|
||||
// TODO: this should be done via flush_ticker.reset_immediately(), but that's always
|
||||
// delayed by 1ms due to this bug: https://github.com/tokio-rs/tokio/issues/6866.
|
||||
_ = future::ready(()), if dirty && self.msg_rx.is_empty() => {
|
||||
_ = future::ready(()), if self.msg_rx.is_empty() && dirty => {
|
||||
dirty = false;
|
||||
flush_ticker.reset();
|
||||
self.tli
|
||||
@@ -627,3 +668,115 @@ impl Drop for WalAcceptor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Buffers WAL data for multiple AppendRequests, to submit them as a single write.
|
||||
struct BufferedAppendRequest {
|
||||
/// The buffer capacity.
|
||||
capacity: usize,
|
||||
/// The buffered header and WAL data.
|
||||
buf: Option<(AppendRequestHeader, BytesMut)>,
|
||||
/// A previous buffer that can be reused when the returned message is dropped.
|
||||
reuse_buf: Option<Bytes>,
|
||||
/// If an AppendRequest is larger than the buffer capacity (when empty), just stash it here to
|
||||
/// avoid growing the buffer and copying it. This will be returned as-is.
|
||||
large: Option<AppendRequest>,
|
||||
}
|
||||
|
||||
impl BufferedAppendRequest {
|
||||
/// Creates a new append request buffer with the given capacity.
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
capacity,
|
||||
buf: None,
|
||||
reuse_buf: None,
|
||||
large: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the given append request to the buffer, if possible. Returns `false` if the message
|
||||
/// can't be buffered, leaving self unmodified. An empty buffer will always accept a message.
|
||||
///
|
||||
/// If the buffer is not empty, the message must have the same term and proposer and contiguous
|
||||
/// `begin_lsn` and `end_lsn`. The buffer must have available capacity for the entire
|
||||
/// `wal_data`. If the message is greater than an empty buffer's capacity, it is accepted but
|
||||
/// simply stashed away in `large` without growing the buffer.
|
||||
pub fn add(&mut self, msg: &AppendRequest) -> bool {
|
||||
// If there is a stashed large message, reject further messages.
|
||||
if self.large.is_some() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If there is no existing buffer, initialize one with the message.
|
||||
let Some((ref mut h, ref mut wal_data)) = self.buf else {
|
||||
// If the message is larger than the buffer capacity, just stash it instead of growing.
|
||||
if msg.wal_data.len() > self.capacity {
|
||||
assert!(self.large.is_none());
|
||||
self.large = Some(msg.clone()); // clone is cheap with Bytes
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reuse a previous buffer, if any, or allocate a new one.
|
||||
//
|
||||
// TODO: try_into_mut() is essentially runtime borrow checking. If AppendRequest used a
|
||||
// normal Vec<u8> we could do compile-time borrow checking instead and avoid panic.
|
||||
let mut wal_data = match self.reuse_buf.take() {
|
||||
Some(reuse_buf) => match reuse_buf.try_into_mut() {
|
||||
Ok(mut reuse_buf) => {
|
||||
assert_eq!(reuse_buf.capacity(), self.capacity);
|
||||
reuse_buf.clear();
|
||||
reuse_buf
|
||||
}
|
||||
Err(_) => panic!("couldn't reuse buffer, still in use"),
|
||||
},
|
||||
None => BytesMut::with_capacity(self.capacity),
|
||||
};
|
||||
// Copy the append request into the buffer.
|
||||
wal_data.put_slice(&msg.wal_data);
|
||||
self.buf = Some((msg.h, wal_data));
|
||||
return true;
|
||||
};
|
||||
|
||||
// The messages must have the same term and proposer.
|
||||
if h.term != msg.h.term || h.proposer_uuid != msg.h.proposer_uuid {
|
||||
return false;
|
||||
}
|
||||
// The messages must be contiguous.
|
||||
if h.end_lsn != msg.h.begin_lsn {
|
||||
return false;
|
||||
}
|
||||
// The message must fit in the buffer.
|
||||
if wal_data.len() + msg.wal_data.len() > self.capacity {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add the message to the buffer, bumping the commit and truncate LSNs. We assume that later
|
||||
// messages have later commit/truncate LSNs.
|
||||
h.end_lsn = msg.h.end_lsn;
|
||||
h.commit_lsn = msg.h.commit_lsn;
|
||||
h.truncate_lsn = msg.h.truncate_lsn;
|
||||
wal_data.put_slice(&msg.wal_data);
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns true if there is no buffered message.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.buf.is_none() && self.large.is_none()
|
||||
}
|
||||
|
||||
/// Takes the buffered AppendRequest (if any), leaving a None in its place.
|
||||
///
|
||||
/// NB: The returned `wal_data` Bytes must be dropped before the next call to `add()`, in order
|
||||
/// to reuse the buffer. This is basically runtime borrow checking, because of Bytes.
|
||||
fn take(&mut self) -> Option<AppendRequest> {
|
||||
// If there is a stashed large message, return it.
|
||||
if let Some(large) = self.large.take() {
|
||||
assert!(self.buf.is_none(), "both buf and large are set");
|
||||
return Some(large);
|
||||
}
|
||||
|
||||
let (h, wal_data) = self.buf.take()?;
|
||||
let wal_data = wal_data.freeze();
|
||||
self.reuse_buf = Some(wal_data.clone()); // keep a reference to the buffer
|
||||
Some(AppendRequest { h, wal_data })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,12 +296,13 @@ pub struct ProposerElected {
|
||||
|
||||
/// Request with WAL message sent from proposer to safekeeper. Along the way it
|
||||
/// communicates commit_lsn.
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AppendRequest {
|
||||
pub h: AppendRequestHeader,
|
||||
pub wal_data: Bytes,
|
||||
}
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize)]
|
||||
pub struct AppendRequestHeader {
|
||||
// safekeeper's current term; if it is higher than proposer's, the compute is out of date.
|
||||
pub term: Term,
|
||||
@@ -979,7 +980,8 @@ where
|
||||
self.wal_store.flush_wal().await?;
|
||||
}
|
||||
|
||||
// Update commit_lsn.
|
||||
// Update commit_lsn. It will be flushed to the control file regularly by the timeline
|
||||
// manager, off of the WAL ingest hot path.
|
||||
if msg.h.commit_lsn != Lsn(0) {
|
||||
self.update_commit_lsn(msg.h.commit_lsn).await?;
|
||||
}
|
||||
@@ -992,15 +994,6 @@ where
|
||||
self.state.inmem.peer_horizon_lsn =
|
||||
max(self.state.inmem.peer_horizon_lsn, msg.h.truncate_lsn);
|
||||
|
||||
// Update truncate and commit LSN in control file.
|
||||
// To avoid negative impact on performance of extra fsync, do it only
|
||||
// when commit_lsn delta exceeds WAL segment size.
|
||||
if self.state.commit_lsn + (self.state.server.wal_seg_size as u64)
|
||||
< self.state.inmem.commit_lsn
|
||||
{
|
||||
self.state.flush().await?;
|
||||
}
|
||||
|
||||
trace!(
|
||||
"processed AppendRequest of len {}, begin_lsn={}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, flushed={:?}",
|
||||
msg.wal_data.len(),
|
||||
@@ -1174,7 +1167,7 @@ mod tests {
|
||||
proposer_uuid: [0; 16],
|
||||
};
|
||||
let mut append_request = AppendRequest {
|
||||
h: ar_hdr.clone(),
|
||||
h: ar_hdr,
|
||||
wal_data: Bytes::from_static(b"b"),
|
||||
};
|
||||
|
||||
@@ -1248,7 +1241,7 @@ mod tests {
|
||||
proposer_uuid: [0; 16],
|
||||
};
|
||||
let append_request = AppendRequest {
|
||||
h: ar_hdr.clone(),
|
||||
h: ar_hdr,
|
||||
wal_data: Bytes::from_static(b"b"),
|
||||
};
|
||||
|
||||
@@ -1256,7 +1249,7 @@ mod tests {
|
||||
sk.process_msg(&ProposerAcceptorMessage::AppendRequest(append_request))
|
||||
.await
|
||||
.unwrap();
|
||||
let mut ar_hrd2 = ar_hdr.clone();
|
||||
let mut ar_hrd2 = ar_hdr;
|
||||
ar_hrd2.begin_lsn = Lsn(4);
|
||||
ar_hrd2.end_lsn = Lsn(5);
|
||||
let append_request = AppendRequest {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
use std::{cmp::max, ops::Deref};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use postgres_ffi::WAL_SEGMENT_SIZE;
|
||||
use safekeeper_api::models::TimelineTermBumpResponse;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::{
|
||||
@@ -144,7 +145,7 @@ impl TimelinePersistentState {
|
||||
ServerInfo {
|
||||
pg_version: 170000, /* Postgres server version (major * 10000) */
|
||||
system_id: 0, /* Postgres system identifier */
|
||||
wal_seg_size: 16 * 1024 * 1024,
|
||||
wal_seg_size: WAL_SEGMENT_SIZE as u32,
|
||||
},
|
||||
vec![],
|
||||
Lsn::INVALID,
|
||||
|
||||
@@ -18,6 +18,7 @@ use crate::{
|
||||
metrics::{
|
||||
EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, NUM_EVICTED_TIMELINES,
|
||||
},
|
||||
rate_limit::rand_duration,
|
||||
timeline_manager::{Manager, StateSnapshot},
|
||||
wal_backup,
|
||||
wal_backup_partial::{self, PartialRemoteSegment},
|
||||
@@ -130,7 +131,8 @@ impl Manager {
|
||||
return;
|
||||
}
|
||||
|
||||
self.update_evict_not_before();
|
||||
self.evict_not_before =
|
||||
tokio::time::Instant::now() + rand_duration(&self.conf.eviction_min_resident);
|
||||
|
||||
info!("successfully restored evicted timeline");
|
||||
NUM_EVICTED_TIMELINES.dec();
|
||||
|
||||
@@ -315,13 +315,15 @@ pub async fn main_task(
|
||||
mgr.set_status(Status::EvictTimeline);
|
||||
if !mgr.evict_timeline().await {
|
||||
// eviction failed, try again later
|
||||
mgr.update_evict_not_before();
|
||||
mgr.evict_not_before =
|
||||
Instant::now() + rand_duration(&mgr.conf.eviction_min_resident);
|
||||
update_next_event(&mut next_event, mgr.evict_not_before);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// we can't evict timeline now, will try again later
|
||||
mgr.update_evict_not_before();
|
||||
mgr.evict_not_before =
|
||||
Instant::now() + rand_duration(&mgr.conf.eviction_min_resident);
|
||||
update_next_event(&mut next_event, mgr.evict_not_before);
|
||||
}
|
||||
}
|
||||
@@ -428,9 +430,7 @@ impl Manager {
|
||||
tli,
|
||||
global_rate_limiter,
|
||||
// to smooth out evictions spike after restart
|
||||
evict_not_before: Instant::now()
|
||||
+ conf.eviction_min_resident
|
||||
+ rand_duration(&conf.eviction_min_resident),
|
||||
evict_not_before: Instant::now() + rand_duration(&conf.eviction_min_resident),
|
||||
conf,
|
||||
}
|
||||
}
|
||||
@@ -515,7 +515,12 @@ impl Manager {
|
||||
return;
|
||||
}
|
||||
|
||||
if state.cfile_last_persist_at.elapsed() > self.conf.control_file_save_interval {
|
||||
if state.cfile_last_persist_at.elapsed() > self.conf.control_file_save_interval
|
||||
// If the control file's commit_lsn lags more than one segment behind the current
|
||||
// commit_lsn, flush immediately to limit recovery time in case of a crash. We don't do
|
||||
// this on the WAL ingest hot path since it incurs fsync latency.
|
||||
|| state.commit_lsn.saturating_sub(state.cfile_commit_lsn).0 >= self.wal_seg_size as u64
|
||||
{
|
||||
let mut write_guard = self.tli.write_shared_state().await;
|
||||
// it should be done in the background because it blocks manager task, but flush() should
|
||||
// be fast enough not to be a problem now
|
||||
@@ -720,14 +725,6 @@ impl Manager {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_evict_not_before(&mut self) {
|
||||
// Wait at least eviction_min_resident and add randomized value of it to
|
||||
// add jitter.
|
||||
self.evict_not_before = Instant::now()
|
||||
+ self.conf.eviction_min_resident
|
||||
+ rand_duration(&self.conf.eviction_min_resident)
|
||||
}
|
||||
}
|
||||
|
||||
// utility functions
|
||||
|
||||
@@ -426,12 +426,12 @@ pub async fn main_task(
|
||||
cancel: CancellationToken,
|
||||
) -> Option<PartialRemoteSegment> {
|
||||
debug!("started");
|
||||
let await_duration = conf.partial_backup_timeout;
|
||||
let mut first_iteration = true;
|
||||
|
||||
let mut commit_lsn_rx = tli.get_commit_lsn_watch_rx();
|
||||
let mut flush_lsn_rx = tli.get_term_flush_lsn_watch_rx();
|
||||
|
||||
let partial_backup_timeout = conf.partial_backup_timeout;
|
||||
let mut backup = PartialBackup::new(tli, conf).await;
|
||||
|
||||
debug!("state: {:?}", backup.state);
|
||||
@@ -489,9 +489,9 @@ pub async fn main_task(
|
||||
// if this is not the first iteration, we will wait for the full await_duration
|
||||
let await_duration = if first_iteration {
|
||||
first_iteration = false;
|
||||
partial_backup_timeout + rand_duration(&partial_backup_timeout)
|
||||
rand_duration(&await_duration)
|
||||
} else {
|
||||
partial_backup_timeout
|
||||
await_duration
|
||||
};
|
||||
|
||||
// fixing the segno and waiting some time to prevent reuploading the same segment too often
|
||||
|
||||
@@ -257,6 +257,9 @@ impl PhysicalStorage {
|
||||
// Try to open existing partial file
|
||||
Ok((file, true))
|
||||
} else {
|
||||
let _timer = WAL_STORAGE_OPERATION_SECONDS
|
||||
.with_label_values(&["initialize_segment"])
|
||||
.start_timer();
|
||||
// Create and fill new partial file
|
||||
//
|
||||
// We're using fdatasync during WAL writing, so file size must not
|
||||
@@ -274,8 +277,6 @@ impl PhysicalStorage {
|
||||
});
|
||||
file.set_len(self.wal_seg_size as u64).await?;
|
||||
|
||||
// Note: this doesn't get into observe_flush_seconds metric. But
|
||||
// segment init should be separate metric, if any.
|
||||
if let Err(e) = durable_rename(&tmp_path, &wal_file_partial_path, !self.no_sync).await {
|
||||
// Probably rename succeeded, but fsync of it failed. Remove
|
||||
// the file then to avoid using it.
|
||||
|
||||
@@ -46,3 +46,8 @@ class EndpointHttpClient(requests.Session):
|
||||
)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
def metrics(self) -> str:
|
||||
res = self.get(f"http://localhost:{self.port}/metrics")
|
||||
res.raise_for_status()
|
||||
return res.text
|
||||
|
||||
@@ -286,7 +286,7 @@ class PgProtocol:
|
||||
return self.safe_psql_many([query], **kwargs)[0]
|
||||
|
||||
def safe_psql_many(
|
||||
self, queries: Iterable[str], log_query=True, **kwargs: Any
|
||||
self, queries: Iterable[str], log_query: bool = True, **kwargs: Any
|
||||
) -> list[list[tuple[Any, ...]]]:
|
||||
"""
|
||||
Execute queries against the node and return all rows.
|
||||
@@ -306,7 +306,7 @@ class PgProtocol:
|
||||
result.append(cur.fetchall())
|
||||
return result
|
||||
|
||||
def safe_psql_scalar(self, query, log_query=True) -> Any:
|
||||
def safe_psql_scalar(self, query: str, log_query: bool = True) -> Any:
|
||||
"""
|
||||
Execute query returning single row with single column.
|
||||
"""
|
||||
@@ -1065,6 +1065,9 @@ class NeonEnv:
|
||||
"http_auth_type": http_auth_type,
|
||||
# Default which can be overriden with `NeonEnvBuilder.pageserver_config_override`
|
||||
"availability_zone": "us-east-2a",
|
||||
# Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
|
||||
# the pageserver taking a long time to start up due to syncfs flushing other tests' data
|
||||
"no_sync": True,
|
||||
}
|
||||
if self.pageserver_virtual_file_io_engine is not None:
|
||||
ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from typing_extensions import override
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -18,12 +16,15 @@ This fixture is used to determine which version of Postgres to use for tests.
|
||||
|
||||
# Inherit PgVersion from str rather than int to make it easier to pass as a command-line argument
|
||||
# TODO: use enum.StrEnum for Python >= 3.11
|
||||
@enum.unique
|
||||
class PgVersion(str, enum.Enum):
|
||||
V14 = "14"
|
||||
V15 = "15"
|
||||
V16 = "16"
|
||||
V17 = "17"
|
||||
|
||||
# Default Postgres Version for tests that don't really depend on Postgres itself
|
||||
DEFAULT = V16
|
||||
|
||||
# Instead of making version an optional parameter in methods, we can use this fake entry
|
||||
# to explicitly rely on the default server version (could be different from pg_version fixture value)
|
||||
NOT_SET = "<-POSTRGRES VERSION IS NOT SET->"
|
||||
@@ -59,27 +60,3 @@ class PgVersion(str, enum.Enum):
|
||||
# Make mypy happy
|
||||
# See https://github.com/python/mypy/issues/3974
|
||||
return None
|
||||
|
||||
|
||||
DEFAULT_VERSION: PgVersion = PgVersion.V16
|
||||
|
||||
|
||||
def skip_on_postgres(version: PgVersion, reason: str):
|
||||
return pytest.mark.skipif(
|
||||
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is version,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def xfail_on_postgres(version: PgVersion, reason: str):
|
||||
return pytest.mark.xfail(
|
||||
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is version,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def run_only_on_default_postgres(reason: str):
|
||||
return pytest.mark.skipif(
|
||||
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is not DEFAULT_VERSION,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
@@ -25,6 +25,7 @@ from fixtures.pageserver.common_types import (
|
||||
parse_delta_layer,
|
||||
parse_image_layer,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
@@ -37,6 +38,7 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
Fn = TypeVar("Fn", bound=Callable[..., Any])
|
||||
|
||||
COMPONENT_BINARIES = {
|
||||
"storage_controller": ("storage_controller",),
|
||||
"storage_broker": ("storage_broker",),
|
||||
@@ -519,7 +521,7 @@ def assert_pageserver_backups_equal(left: Path, right: Path, skip_files: set[str
|
||||
This is essentially:
|
||||
|
||||
lines=$(comm -3 \
|
||||
<(mkdir left && cd left && tar xf "$left" && find . -type f -print0 | xargs sha256sum | sort -k2) \
|
||||
<(mkdir left && cd left && tar xf "$left" && find . -type f -print0 | xargs sha256sum | sort -k2) \
|
||||
<(mkdir right && cd right && tar xf "$right" && find . -type f -print0 | xargs sha256sum | sort -k2) \
|
||||
| wc -l)
|
||||
[ "$lines" = "0" ]
|
||||
@@ -643,3 +645,40 @@ def allpairs_versions():
|
||||
)
|
||||
ids.append(f"combination_{''.join(cur_id)}")
|
||||
return {"argnames": "combination", "argvalues": tuple(argvalues), "ids": ids}
|
||||
|
||||
|
||||
def skip_on_postgres(version: PgVersion, reason: str):
|
||||
return pytest.mark.skipif(
|
||||
PgVersion(os.getenv("DEFAULT_PG_VERSION", PgVersion.DEFAULT)) is version,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def xfail_on_postgres(version: PgVersion, reason: str):
|
||||
return pytest.mark.xfail(
|
||||
PgVersion(os.getenv("DEFAULT_PG_VERSION", PgVersion.DEFAULT)) is version,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def run_only_on_default_postgres(reason: str):
|
||||
return pytest.mark.skipif(
|
||||
PgVersion(os.getenv("DEFAULT_PG_VERSION", PgVersion.DEFAULT)) is not PgVersion.DEFAULT,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def skip_in_debug_build(reason: str):
|
||||
return pytest.mark.skipif(
|
||||
os.getenv("BUILD_TYPE", "debug") == "debug",
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
|
||||
def skip_on_ci(reason: str):
|
||||
# `CI` variable is always set to `true` on GitHub
|
||||
# Ref: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables
|
||||
return pytest.mark.skipif(
|
||||
os.getenv("CI", "false") == "true",
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -14,7 +13,7 @@ from fixtures.neon_fixtures import (
|
||||
PgBin,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.utils import get_scale_for_db, humantime_to_ms
|
||||
from fixtures.utils import get_scale_for_db, humantime_to_ms, skip_on_ci
|
||||
|
||||
from performance.pageserver.util import (
|
||||
setup_pageserver_with_tenants,
|
||||
@@ -38,9 +37,8 @@ if TYPE_CHECKING:
|
||||
@pytest.mark.parametrize("pgbench_scale", [get_scale_for_db(200)])
|
||||
@pytest.mark.parametrize("n_tenants", [500])
|
||||
@pytest.mark.timeout(10000)
|
||||
@pytest.mark.skipif(
|
||||
os.getenv("CI", "false") == "true",
|
||||
reason="This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI",
|
||||
@skip_on_ci(
|
||||
"This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI"
|
||||
)
|
||||
def test_pageserver_characterize_throughput_with_n_tenants(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
@@ -66,9 +64,8 @@ def test_pageserver_characterize_throughput_with_n_tenants(
|
||||
@pytest.mark.parametrize("n_clients", [1, 64])
|
||||
@pytest.mark.parametrize("n_tenants", [1])
|
||||
@pytest.mark.timeout(2400)
|
||||
@pytest.mark.skipif(
|
||||
os.getenv("CI", "false") == "true",
|
||||
reason="This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI",
|
||||
@skip_on_ci(
|
||||
"This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI"
|
||||
)
|
||||
def test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
|
||||
@@ -149,12 +149,16 @@ def test_subscriber_lag(
|
||||
check_pgbench_still_running(pub_workload, "pub")
|
||||
check_pgbench_still_running(sub_workload, "sub")
|
||||
|
||||
with (
|
||||
psycopg2.connect(pub_connstr) as pub_conn,
|
||||
psycopg2.connect(sub_connstr) as sub_conn,
|
||||
):
|
||||
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
|
||||
lag = measure_logical_replication_lag(sub_cur, pub_cur)
|
||||
pub_conn = psycopg2.connect(pub_connstr)
|
||||
sub_conn = psycopg2.connect(sub_connstr)
|
||||
pub_conn.autocommit = True
|
||||
sub_conn.autocommit = True
|
||||
|
||||
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
|
||||
lag = measure_logical_replication_lag(sub_cur, pub_cur)
|
||||
|
||||
pub_conn.close()
|
||||
sub_conn.close()
|
||||
|
||||
log.info(f"Replica lagged behind master by {lag} seconds")
|
||||
zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
|
||||
@@ -206,6 +210,7 @@ def test_publisher_restart(
|
||||
sub_conn = psycopg2.connect(sub_connstr)
|
||||
pub_conn.autocommit = True
|
||||
sub_conn.autocommit = True
|
||||
|
||||
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
|
||||
pub_cur.execute("SELECT 1 FROM pg_catalog.pg_publication WHERE pubname = 'pub1'")
|
||||
pub_exists = len(pub_cur.fetchall()) != 0
|
||||
@@ -222,6 +227,7 @@ def test_publisher_restart(
|
||||
sub_cur.execute(f"create subscription sub1 connection '{pub_connstr}' publication pub1")
|
||||
|
||||
initial_sync_lag = measure_logical_replication_lag(sub_cur, pub_cur)
|
||||
|
||||
pub_conn.close()
|
||||
sub_conn.close()
|
||||
|
||||
@@ -248,12 +254,17 @@ def test_publisher_restart(
|
||||
["pgbench", "-c10", pgbench_duration, "-Mprepared"],
|
||||
env=pub_env,
|
||||
)
|
||||
with (
|
||||
psycopg2.connect(pub_connstr) as pub_conn,
|
||||
psycopg2.connect(sub_connstr) as sub_conn,
|
||||
):
|
||||
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
|
||||
lag = measure_logical_replication_lag(sub_cur, pub_cur)
|
||||
|
||||
pub_conn = psycopg2.connect(pub_connstr)
|
||||
sub_conn = psycopg2.connect(sub_connstr)
|
||||
pub_conn.autocommit = True
|
||||
sub_conn.autocommit = True
|
||||
|
||||
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
|
||||
lag = measure_logical_replication_lag(sub_cur, pub_cur)
|
||||
|
||||
pub_conn.close()
|
||||
sub_conn.close()
|
||||
|
||||
log.info(f"Replica lagged behind master by {lag} seconds")
|
||||
zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
|
||||
@@ -288,58 +299,56 @@ def test_snap_files(
|
||||
env = benchmark_project_pub.pgbench_env
|
||||
connstr = benchmark_project_pub.connstr
|
||||
|
||||
with psycopg2.connect(connstr) as conn:
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT rolsuper FROM pg_roles WHERE rolname = 'neondb_owner'")
|
||||
is_super = cast("bool", cur.fetchall()[0][0])
|
||||
assert is_super, "This benchmark won't work if we don't have superuser"
|
||||
conn = psycopg2.connect(connstr)
|
||||
conn.autocommit = True
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT rolsuper FROM pg_roles WHERE rolname = 'neondb_owner'")
|
||||
is_super = cast("bool", cur.fetchall()[0][0])
|
||||
assert is_super, "This benchmark won't work if we don't have superuser"
|
||||
|
||||
conn.close()
|
||||
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=env)
|
||||
|
||||
conn = psycopg2.connect(connstr)
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
cur.execute("ALTER SYSTEM SET neon.logical_replication_max_snap_files = -1")
|
||||
|
||||
with psycopg2.connect(connstr) as conn:
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT pg_reload_conf()")
|
||||
|
||||
with psycopg2.connect(connstr) as conn:
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_replication_slots
|
||||
WHERE slot_name = 'slotter'
|
||||
) THEN
|
||||
PERFORM pg_drop_replication_slot('slotter');
|
||||
END IF;
|
||||
END $$;
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
)
|
||||
cur.execute("SELECT pg_create_logical_replication_slot('slotter', 'test_decoding')")
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_replication_slots
|
||||
WHERE slot_name = 'slotter'
|
||||
) THEN
|
||||
PERFORM pg_drop_replication_slot('slotter');
|
||||
END IF;
|
||||
END $$;
|
||||
"""
|
||||
)
|
||||
cur.execute("SELECT pg_create_logical_replication_slot('slotter', 'test_decoding')")
|
||||
|
||||
conn.close()
|
||||
|
||||
workload = pg_bin.run_nonblocking(["pgbench", "-c10", pgbench_duration, "-Mprepared"], env=env)
|
||||
try:
|
||||
start = time.time()
|
||||
prev_measurement = time.time()
|
||||
while time.time() - start < test_duration_min * 60:
|
||||
with psycopg2.connect(connstr) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT count(*) FROM (SELECT pg_log_standby_snapshot() FROM generate_series(1, 10000) g) s"
|
||||
)
|
||||
check_pgbench_still_running(workload)
|
||||
cur.execute(
|
||||
"SELECT pg_replication_slot_advance('slotter', pg_current_wal_lsn())"
|
||||
)
|
||||
conn = psycopg2.connect(connstr)
|
||||
conn.autocommit = True
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT count(*) FROM (SELECT pg_log_standby_snapshot() FROM generate_series(1, 10000) g) s"
|
||||
)
|
||||
check_pgbench_still_running(workload)
|
||||
cur.execute("SELECT pg_replication_slot_advance('slotter', pg_current_wal_lsn())")
|
||||
|
||||
conn.close()
|
||||
|
||||
# Measure storage
|
||||
if time.time() - prev_measurement > test_interval_min * 60:
|
||||
|
||||
@@ -102,15 +102,21 @@ def test_ro_replica_lag(
|
||||
check_pgbench_still_running(master_workload)
|
||||
check_pgbench_still_running(replica_workload)
|
||||
time.sleep(sync_interval_min * 60)
|
||||
|
||||
conn_master = psycopg2.connect(master_connstr)
|
||||
conn_replica = psycopg2.connect(replica_connstr)
|
||||
conn_master.autocommit = True
|
||||
conn_replica.autocommit = True
|
||||
|
||||
with (
|
||||
psycopg2.connect(master_connstr) as conn_master,
|
||||
psycopg2.connect(replica_connstr) as conn_replica,
|
||||
conn_master.cursor() as cur_master,
|
||||
conn_replica.cursor() as cur_replica,
|
||||
):
|
||||
with (
|
||||
conn_master.cursor() as cur_master,
|
||||
conn_replica.cursor() as cur_replica,
|
||||
):
|
||||
lag = measure_replication_lag(cur_master, cur_replica)
|
||||
lag = measure_replication_lag(cur_master, cur_replica)
|
||||
|
||||
conn_master.close()
|
||||
conn_replica.close()
|
||||
|
||||
log.info(f"Replica lagged behind master by {lag} seconds")
|
||||
zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
|
||||
finally:
|
||||
@@ -219,11 +225,15 @@ def test_replication_start_stop(
|
||||
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10"], env=master_env)
|
||||
|
||||
# Sync replicas
|
||||
with psycopg2.connect(master_connstr) as conn_master:
|
||||
with conn_master.cursor() as cur_master:
|
||||
for i in range(num_replicas):
|
||||
conn_replica = psycopg2.connect(replica_connstr[i])
|
||||
measure_replication_lag(cur_master, conn_replica.cursor())
|
||||
conn_master = psycopg2.connect(master_connstr)
|
||||
conn_master.autocommit = True
|
||||
|
||||
with conn_master.cursor() as cur_master:
|
||||
for i in range(num_replicas):
|
||||
conn_replica = psycopg2.connect(replica_connstr[i])
|
||||
measure_replication_lag(cur_master, conn_replica.cursor())
|
||||
|
||||
conn_master.close()
|
||||
|
||||
master_pgbench = pg_bin.run_nonblocking(
|
||||
[
|
||||
@@ -277,17 +287,22 @@ def test_replication_start_stop(
|
||||
|
||||
time.sleep(configuration_test_time_sec)
|
||||
|
||||
with psycopg2.connect(master_connstr) as conn_master:
|
||||
with conn_master.cursor() as cur_master:
|
||||
for ireplica in range(num_replicas):
|
||||
replica_conn = psycopg2.connect(replica_connstr[ireplica])
|
||||
lag = measure_replication_lag(cur_master, replica_conn.cursor())
|
||||
zenbenchmark.record(
|
||||
f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
|
||||
)
|
||||
log.info(
|
||||
f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
|
||||
)
|
||||
conn_master = psycopg2.connect(master_connstr)
|
||||
conn_master.autocommit = True
|
||||
|
||||
with conn_master.cursor() as cur_master:
|
||||
for ireplica in range(num_replicas):
|
||||
replica_conn = psycopg2.connect(replica_connstr[ireplica])
|
||||
lag = measure_replication_lag(cur_master, replica_conn.cursor())
|
||||
zenbenchmark.record(
|
||||
f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
|
||||
)
|
||||
log.info(
|
||||
f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
|
||||
)
|
||||
|
||||
conn_master.close()
|
||||
|
||||
master_pgbench.terminate()
|
||||
except Exception as e:
|
||||
error_occurred = True
|
||||
|
||||
@@ -8,7 +8,7 @@ from fixtures.common_types import Lsn, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.pageserver.http import TimelineCreate406
|
||||
from fixtures.utils import query_scalar
|
||||
from fixtures.utils import query_scalar, skip_in_debug_build
|
||||
|
||||
|
||||
# Test the GC implementation when running with branching.
|
||||
@@ -48,10 +48,8 @@ from fixtures.utils import query_scalar
|
||||
# Because the delta layer D covering lsn1 is corrupted, creating a branch
|
||||
# starting from lsn1 should return an error as follows:
|
||||
# could not find data for key ... at LSN ..., for request at LSN ...
|
||||
def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
|
||||
if build_type == "debug":
|
||||
pytest.skip("times out in debug builds")
|
||||
|
||||
@skip_in_debug_build("times out in debug builds")
|
||||
def test_branch_and_gc(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
pageserver_http_client = env.pageserver.http_client()
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -13,7 +12,7 @@ from fixtures.neon_fixtures import (
|
||||
generate_uploads_and_deletions,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
from fixtures.workload import Workload
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -32,7 +31,7 @@ AGGRESIVE_COMPACTION_TENANT_CONF = {
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
|
||||
@skip_in_debug_build("only run with release build")
|
||||
def test_pageserver_compaction_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
This is a smoke test that compaction kicks in. The workload repeatedly churns
|
||||
|
||||
@@ -12,6 +12,7 @@ from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.utils import skip_on_postgres
|
||||
from pytest_httpserver import HTTPServer
|
||||
from werkzeug.wrappers.request import Request
|
||||
from werkzeug.wrappers.response import Response
|
||||
@@ -41,17 +42,14 @@ def neon_env_builder_local(
|
||||
return neon_env_builder
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
|
||||
@skip_on_postgres(PgVersion.V17, reason="TODO: PG17 extension building")
|
||||
def test_remote_extensions(
|
||||
httpserver: HTTPServer,
|
||||
neon_env_builder_local: NeonEnvBuilder,
|
||||
httpserver_listen_address,
|
||||
pg_version,
|
||||
):
|
||||
if pg_version == PgVersion.V16:
|
||||
pytest.skip("TODO: PG16 extension building")
|
||||
if pg_version == PgVersion.V17:
|
||||
pytest.skip("TODO: PG17 extension building")
|
||||
|
||||
# setup mock http server
|
||||
# that expects request for anon.tar.zst
|
||||
# and returns the requested file
|
||||
|
||||
@@ -4,25 +4,22 @@ from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_flush_lsn
|
||||
from fixtures.pageserver.http import HistoricLayerInfo, LayerMapInfo
|
||||
from fixtures.utils import human_bytes
|
||||
from fixtures.utils import human_bytes, skip_in_debug_build
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Union
|
||||
|
||||
|
||||
def test_ingesting_large_batches_of_images(neon_env_builder: NeonEnvBuilder, build_type: str):
|
||||
@skip_in_debug_build("debug run is unnecessarily slow")
|
||||
def test_ingesting_large_batches_of_images(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Build a non-small GIN index which includes similarly batched up images in WAL stream as does pgvector
|
||||
to show that we no longer create oversized layers.
|
||||
"""
|
||||
|
||||
if build_type == "debug":
|
||||
pytest.skip("debug run is unnecessarily slow")
|
||||
|
||||
minimum_initdb_size = 20 * 1024**2
|
||||
checkpoint_distance = 32 * 1024**2
|
||||
minimum_good_layer_size = checkpoint_distance * 0.9
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
from logging import info
|
||||
from __future__ import annotations
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
import time
|
||||
from logging import info
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
|
||||
def test_installed_extensions(neon_simple_env: NeonEnv):
|
||||
@@ -85,3 +93,52 @@ def test_installed_extensions(neon_simple_env: NeonEnv):
|
||||
assert ext["n_databases"] == 2
|
||||
ext["versions"].sort()
|
||||
assert ext["versions"] == ["1.2", "1.3"]
|
||||
|
||||
# check that /metrics endpoint is available
|
||||
# ensure that we see the metric before and after restart
|
||||
res = client.metrics()
|
||||
info("Metrics: %s", res)
|
||||
m = parse_metrics(res)
|
||||
neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.2"})
|
||||
assert len(neon_m) == 1
|
||||
for sample in neon_m:
|
||||
assert sample.value == 2
|
||||
neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.3"})
|
||||
assert len(neon_m) == 1
|
||||
for sample in neon_m:
|
||||
assert sample.value == 1
|
||||
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
|
||||
timeout = 10
|
||||
while timeout > 0:
|
||||
try:
|
||||
res = client.metrics()
|
||||
timeout = -1
|
||||
if len(parse_metrics(res).query_all("installed_extensions")) < 4:
|
||||
# Assume that not all metrics that are collected yet
|
||||
time.sleep(1)
|
||||
timeout -= 1
|
||||
continue
|
||||
except Exception:
|
||||
log.exception("failed to get metrics, assume they are not collected yet")
|
||||
time.sleep(1)
|
||||
timeout -= 1
|
||||
continue
|
||||
|
||||
assert (
|
||||
len(parse_metrics(res).query_all("installed_extensions")) >= 4
|
||||
), "Not all metrics are collected"
|
||||
|
||||
info("After restart metrics: %s", res)
|
||||
m = parse_metrics(res)
|
||||
neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.2"})
|
||||
assert len(neon_m) == 1
|
||||
for sample in neon_m:
|
||||
assert sample.value == 1
|
||||
|
||||
neon_m = m.query_all("installed_extensions", {"extension_name": "neon", "version": "1.3"})
|
||||
assert len(neon_m) == 1
|
||||
for sample in neon_m:
|
||||
assert sample.value == 1
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
@@ -10,12 +9,18 @@ from fixtures.neon_fixtures import (
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.utils import skip_on_postgres
|
||||
|
||||
|
||||
@skip_on_postgres(
|
||||
PgVersion.V14,
|
||||
reason="pg_log_standby_snapshot() function is available since Postgres 16",
|
||||
)
|
||||
@skip_on_postgres(
|
||||
PgVersion.V15,
|
||||
reason="pg_log_standby_snapshot() function is available since Postgres 16",
|
||||
)
|
||||
def test_layer_bloating(neon_env_builder: NeonEnvBuilder, vanilla_pg):
|
||||
if neon_env_builder.pg_version != PgVersion.V16:
|
||||
pytest.skip("pg_log_standby_snapshot() function is available only in PG16")
|
||||
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"gc_period": "0s",
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
@@ -12,17 +11,13 @@ from fixtures.neon_fixtures import (
|
||||
from fixtures.pageserver.common_types import parse_layer_file_name
|
||||
from fixtures.pageserver.utils import wait_for_upload
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
from fixtures.utils import skip_in_debug_build
|
||||
|
||||
|
||||
# Crates a few layers, ensures that we can evict them (removing locally but keeping track of them anyway)
|
||||
# and then download them back.
|
||||
def test_basic_eviction(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
build_type: str,
|
||||
):
|
||||
if build_type == "debug":
|
||||
pytest.skip("times out in debug builds")
|
||||
|
||||
@skip_in_debug_build("times out in debug builds")
|
||||
def test_basic_eviction(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
|
||||
env = neon_env_builder.init_start(
|
||||
|
||||
@@ -5,8 +5,7 @@ import uuid
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.pg_version import run_only_on_default_postgres
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import run_only_on_default_postgres, wait_until
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", ["trace", "debug", "info", "warn", "error"])
|
||||
|
||||
@@ -4,24 +4,31 @@ import time
|
||||
from functools import partial
|
||||
from random import choice
|
||||
from string import ascii_lowercase
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from fixtures.common_types import Lsn
|
||||
from fixtures.common_types import Lsn, TenantId, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgProtocol,
|
||||
logical_replication_sync,
|
||||
wait_for_last_flush_lsn,
|
||||
)
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import (
|
||||
Endpoint,
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
PgProtocol,
|
||||
VanillaPostgres,
|
||||
)
|
||||
|
||||
|
||||
def random_string(n: int):
|
||||
return "".join([choice(ascii_lowercase) for _ in range(n)])
|
||||
|
||||
|
||||
def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
|
||||
env = neon_simple_env
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
@@ -160,10 +167,10 @@ COMMIT;
|
||||
|
||||
|
||||
# Test that neon.logical_replication_max_snap_files works
|
||||
def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def slot_removed(ep):
|
||||
def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
|
||||
def slot_removed(ep: Endpoint):
|
||||
assert (
|
||||
endpoint.safe_psql(
|
||||
ep.safe_psql(
|
||||
"select count(*) from pg_replication_slots where slot_name = 'stale_slot'"
|
||||
)[0][0]
|
||||
== 0
|
||||
@@ -254,7 +261,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
|
||||
|
||||
|
||||
# Tests that walsender correctly blocks until WAL is downloaded from safekeepers
|
||||
def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg):
|
||||
def test_lr_with_slow_safekeeper(neon_env_builder: NeonEnvBuilder, vanilla_pg: VanillaPostgres):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
@@ -336,13 +343,13 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
|
||||
#
|
||||
# Most pages start with a contrecord, so we don't do anything special
|
||||
# to ensure that.
|
||||
def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
|
||||
env = neon_simple_env
|
||||
|
||||
env.create_branch("init")
|
||||
endpoint = env.endpoints.create_start("init")
|
||||
tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
|
||||
timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
|
||||
tenant_id = TenantId(cast("str", endpoint.safe_psql("show neon.tenant_id")[0][0]))
|
||||
timeline_id = TimelineId(cast("str", endpoint.safe_psql("show neon.timeline_id")[0][0]))
|
||||
|
||||
cur = endpoint.connect().cursor()
|
||||
cur.execute("create table t(key int, value text)")
|
||||
@@ -380,7 +387,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
# logical replication bug as such, but without logical replication,
|
||||
# records passed ot the WAL redo process are never large enough to hit
|
||||
# the bug.
|
||||
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
|
||||
env = neon_simple_env
|
||||
|
||||
env.create_branch("init")
|
||||
@@ -522,15 +529,20 @@ def logical_replication_wait_flush_lsn_sync(publisher: PgProtocol) -> Lsn:
|
||||
because for some WAL records like vacuum subscriber won't get any data at
|
||||
all.
|
||||
"""
|
||||
publisher_flush_lsn = Lsn(publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
publisher_flush_lsn = Lsn(
|
||||
cast("str", publisher.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
|
||||
)
|
||||
|
||||
def check_caughtup():
|
||||
res = publisher.safe_psql(
|
||||
"""
|
||||
res = cast(
|
||||
"tuple[str, str, str]",
|
||||
publisher.safe_psql(
|
||||
"""
|
||||
select sent_lsn, flush_lsn, pg_current_wal_flush_lsn() from pg_stat_replication sr, pg_replication_slots s
|
||||
where s.active_pid = sr.pid and s.slot_type = 'logical';
|
||||
"""
|
||||
)[0]
|
||||
)[0],
|
||||
)
|
||||
sent_lsn, flush_lsn, curr_publisher_flush_lsn = Lsn(res[0]), Lsn(res[1]), Lsn(res[2])
|
||||
log.info(
|
||||
f"sent_lsn={sent_lsn}, flush_lsn={flush_lsn}, publisher_flush_lsn={curr_publisher_flush_lsn}, waiting flush_lsn to reach {publisher_flush_lsn}"
|
||||
@@ -545,7 +557,7 @@ select sent_lsn, flush_lsn, pg_current_wal_flush_lsn() from pg_stat_replication
|
||||
# flush_lsn reporting to publisher. Without this, subscriber may ack too far,
|
||||
# losing data on restart because publisher implicitly advances positition given
|
||||
# in START_REPLICATION to the confirmed_flush_lsn of the slot.
|
||||
def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg: VanillaPostgres):
|
||||
env = neon_simple_env
|
||||
# use vanilla as publisher to allow writes on it when safekeeper is down
|
||||
vanilla_pg.configure(
|
||||
@@ -593,7 +605,7 @@ def test_subscriber_synchronous_commit(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
# logical_replication_wait_flush_lsn_sync is expected to hang while
|
||||
# safekeeper is down.
|
||||
vanilla_pg.safe_psql("checkpoint;")
|
||||
assert sub.safe_psql_scalar("SELECT count(*) FROM t") == 1000
|
||||
assert cast("int", sub.safe_psql_scalar("SELECT count(*) FROM t")) == 1000
|
||||
|
||||
# restart subscriber and ensure it can catch up lost tail again
|
||||
sub.stop(mode="immediate")
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
@@ -15,7 +14,7 @@ from fixtures.neon_fixtures import (
|
||||
parse_project_git_version_output,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pg_version import PgVersion, skip_on_postgres
|
||||
from fixtures.utils import run_only_on_default_postgres, skip_in_debug_build
|
||||
|
||||
|
||||
def helper_compare_timeline_list(
|
||||
@@ -195,10 +194,8 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
|
||||
res.check_returncode()
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V14, reason="does not use postgres")
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("BUILD_TYPE") == "debug", reason="unit test for test support, either build works"
|
||||
)
|
||||
@run_only_on_default_postgres(reason="does not use postgres")
|
||||
@skip_in_debug_build("unit test for test support, either build works")
|
||||
def test_parse_project_git_version_output_positive():
|
||||
commit = "b6f77b5816cf1dba12a3bc8747941182ce220846"
|
||||
|
||||
@@ -217,10 +214,8 @@ def test_parse_project_git_version_output_positive():
|
||||
assert parse_project_git_version_output(example) == commit
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V14, reason="does not use postgres")
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("BUILD_TYPE") == "debug", reason="unit test for test support, either build works"
|
||||
)
|
||||
@run_only_on_default_postgres(reason="does not use postgres")
|
||||
@skip_in_debug_build("unit test for test support, either build works")
|
||||
def test_parse_project_git_version_output_local_docker():
|
||||
"""
|
||||
Makes sure the tests don't accept the default version in Dockerfile one gets without providing
|
||||
@@ -234,10 +229,8 @@ def test_parse_project_git_version_output_local_docker():
|
||||
assert input in str(e)
|
||||
|
||||
|
||||
@skip_on_postgres(PgVersion.V14, reason="does not use postgres")
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("BUILD_TYPE") == "debug", reason="cli api sanity, either build works"
|
||||
)
|
||||
@run_only_on_default_postgres(reason="does not use postgres")
|
||||
@skip_in_debug_build("unit test for test support, either build works")
|
||||
def test_binaries_version_parses(neon_binpath: Path):
|
||||
"""
|
||||
Ensures that we can parse the actual outputs of --version from a set of binaries.
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -16,7 +15,7 @@ from fixtures.neon_fixtures import (
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverHttpClient
|
||||
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Optional
|
||||
@@ -227,12 +226,9 @@ def test_idle_checkpoints(neon_env_builder: NeonEnvBuilder):
|
||||
assert get_dirty_bytes(env) >= dirty_after_write
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
# We have to use at least ~100MB of data to hit the lowest limit we can configure, which is
|
||||
# prohibitively slow in debug mode
|
||||
os.getenv("BUILD_TYPE") == "debug",
|
||||
reason="Avoid running bulkier ingest tests in debug mode",
|
||||
)
|
||||
# We have to use at least ~100MB of data to hit the lowest limit we can configure, which is
|
||||
# prohibitively slow in debug mode
|
||||
@skip_in_debug_build("Avoid running bulkier ingest tests in debug mode")
|
||||
def test_total_size_limit(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test that checkpoints are done based on total ephemeral layer size, even if no one timeline is
|
||||
|
||||
@@ -8,7 +8,7 @@ import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.remote_storage import s3_storage
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
@@ -155,12 +155,8 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
# safekeeper and compute node keep running.
|
||||
@pytest.mark.timeout(540)
|
||||
@pytest.mark.parametrize("shard_count", [None, 4])
|
||||
def test_pageserver_chaos(
|
||||
neon_env_builder: NeonEnvBuilder, build_type: str, shard_count: Optional[int]
|
||||
):
|
||||
if build_type == "debug":
|
||||
pytest.skip("times out in debug builds")
|
||||
|
||||
@skip_in_debug_build("times out in debug builds")
|
||||
def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder, shard_count: Optional[int]):
|
||||
# same rationale as with the immediate stop; we might leave orphan layers behind.
|
||||
neon_env_builder.disable_scrub_on_exit()
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
|
||||
@@ -17,7 +17,7 @@ from fixtures.pageserver.utils import (
|
||||
wait_for_upload_queue_empty,
|
||||
)
|
||||
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
from fixtures.workload import Workload
|
||||
from werkzeug.wrappers.request import Request
|
||||
from werkzeug.wrappers.response import Response
|
||||
@@ -765,7 +765,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
|
||||
assert download_rate < expect_download_rate * 2
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
|
||||
@skip_in_debug_build("only run with release build")
|
||||
@pytest.mark.parametrize("via_controller", [True, False])
|
||||
def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controller: bool):
|
||||
"""
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
#
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, cast
|
||||
@@ -19,6 +18,7 @@ from fixtures.neon_fixtures import (
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.remote_storage import s3_storage
|
||||
from fixtures.utils import skip_in_debug_build
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Optional
|
||||
@@ -329,7 +329,7 @@ def test_sql_regress(
|
||||
post_checks(env, test_output_dir, DBNAME, endpoint)
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
|
||||
@skip_in_debug_build("only run with release build")
|
||||
def test_tx_abort_with_many_relations(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
):
|
||||
|
||||
@@ -5,7 +5,8 @@ import time
|
||||
from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
|
||||
|
||||
|
||||
def test_physical_and_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
"""Test read replica of a primary which has a logical replication publication"""
|
||||
env = neon_simple_env
|
||||
|
||||
n_records = 100000
|
||||
@@ -13,7 +14,6 @@ def test_physical_and_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
primary = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
endpoint_id="primary",
|
||||
config_lines=["min_wal_size=32MB", "max_wal_size=64MB"],
|
||||
)
|
||||
p_con = primary.connect()
|
||||
p_cur = p_con.cursor()
|
||||
@@ -30,7 +30,6 @@ def test_physical_and_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
secondary = env.endpoints.new_replica_start(
|
||||
origin=primary,
|
||||
endpoint_id="secondary",
|
||||
config_lines=["min_wal_size=32MB", "max_wal_size=64MB"],
|
||||
)
|
||||
|
||||
s_con = secondary.connect()
|
||||
@@ -48,3 +47,51 @@ def test_physical_and_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
# Check that LR slot is not copied to replica
|
||||
s_cur.execute("select count(*) from pg_replication_slots")
|
||||
assert s_cur.fetchall()[0][0] == 0
|
||||
|
||||
|
||||
def test_aux_not_logged_at_replica(neon_simple_env: NeonEnv, vanilla_pg):
|
||||
"""Test that AUX files are not saved at replica"""
|
||||
env = neon_simple_env
|
||||
|
||||
n_records = 20000
|
||||
|
||||
primary = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
endpoint_id="primary",
|
||||
)
|
||||
p_con = primary.connect()
|
||||
p_cur = p_con.cursor()
|
||||
p_cur.execute("CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))")
|
||||
p_cur.execute("create publication pub1 for table t")
|
||||
|
||||
# start subscriber
|
||||
vanilla_pg.start()
|
||||
vanilla_pg.safe_psql("CREATE TABLE t(pk bigint primary key, payload text)")
|
||||
connstr = primary.connstr().replace("'", "''")
|
||||
vanilla_pg.safe_psql(f"create subscription sub1 connection '{connstr}' publication pub1")
|
||||
|
||||
for pk in range(n_records):
|
||||
p_cur.execute("insert into t (pk) values (%s)", (pk,))
|
||||
|
||||
# LR snapshot is stored each 15 seconds
|
||||
time.sleep(16)
|
||||
|
||||
# start replica
|
||||
secondary = env.endpoints.new_replica_start(
|
||||
origin=primary,
|
||||
endpoint_id="secondary",
|
||||
)
|
||||
|
||||
s_con = secondary.connect()
|
||||
s_cur = s_con.cursor()
|
||||
|
||||
logical_replication_sync(vanilla_pg, primary)
|
||||
|
||||
assert vanilla_pg.safe_psql("select count(*) from t")[0][0] == n_records
|
||||
s_cur.execute("select count(*) from t")
|
||||
assert s_cur.fetchall()[0][0] == n_records
|
||||
|
||||
vanilla_pg.stop()
|
||||
secondary.stop()
|
||||
primary.stop()
|
||||
assert not secondary.log_contains("cannot make new WAL entries during recovery")
|
||||
|
||||
@@ -30,7 +30,7 @@ import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv, wait_for_last_flush_lsn, wait_replica_caughtup
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.utils import query_scalar, wait_until
|
||||
from fixtures.utils import query_scalar, skip_on_postgres, wait_until
|
||||
|
||||
CREATE_SUBXACTS_FUNC = """
|
||||
create or replace function create_subxacts(n integer) returns void as $$
|
||||
@@ -137,6 +137,12 @@ def test_replica_start_scan_clog_crashed_xids(neon_simple_env: NeonEnv):
|
||||
assert secondary_cur.fetchone() == (1,)
|
||||
|
||||
|
||||
@skip_on_postgres(
|
||||
PgVersion.V14, reason="pg_log_standby_snapshot() function is available since Postgres 16"
|
||||
)
|
||||
@skip_on_postgres(
|
||||
PgVersion.V15, reason="pg_log_standby_snapshot() function is available since Postgres 16"
|
||||
)
|
||||
def test_replica_start_at_running_xacts(neon_simple_env: NeonEnv, pg_version):
|
||||
"""
|
||||
Test that starting a replica works right after the primary has
|
||||
@@ -149,9 +155,6 @@ def test_replica_start_at_running_xacts(neon_simple_env: NeonEnv, pg_version):
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
if env.pg_version == PgVersion.V14 or env.pg_version == PgVersion.V15:
|
||||
pytest.skip("pg_log_standby_snapshot() function is available only in PG16")
|
||||
|
||||
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
|
||||
primary_conn = primary.connect()
|
||||
primary_cur = primary_conn.cursor()
|
||||
|
||||
@@ -20,7 +20,7 @@ from fixtures.neon_fixtures import (
|
||||
)
|
||||
from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty
|
||||
from fixtures.remote_storage import s3_storage
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
from fixtures.workload import Workload
|
||||
from pytest_httpserver import HTTPServer
|
||||
from typing_extensions import override
|
||||
@@ -853,12 +853,9 @@ def test_sharding_split_stripe_size(
|
||||
wait_until(10, 1, assert_restart_notification)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
# The quantity of data isn't huge, but debug can be _very_ slow, and the things we're
|
||||
# validating in this test don't benefit much from debug assertions.
|
||||
os.getenv("BUILD_TYPE") == "debug",
|
||||
reason="Avoid running bulkier ingest tests in debug mode",
|
||||
)
|
||||
# The quantity of data isn't huge, but debug can be _very_ slow, and the things we're
|
||||
# validating in this test don't benefit much from debug assertions.
|
||||
@skip_in_debug_build("Avoid running bulkier ingest tests in debug mode")
|
||||
def test_sharding_ingest_layer_sizes(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
):
|
||||
|
||||
@@ -36,11 +36,12 @@ from fixtures.pageserver.utils import (
|
||||
remote_storage_delete_key,
|
||||
timeline_delete_wait_completed,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion, run_only_on_default_postgres
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.port_distributor import PortDistributor
|
||||
from fixtures.remote_storage import RemoteStorageKind, s3_storage
|
||||
from fixtures.storage_controller_proxy import StorageControllerProxy
|
||||
from fixtures.utils import (
|
||||
run_only_on_default_postgres,
|
||||
run_pg_bench_small,
|
||||
subprocess_capture,
|
||||
wait_until,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
@@ -21,7 +20,7 @@ from fixtures.pageserver.utils import (
|
||||
wait_until_tenant_active,
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
|
||||
|
||||
def test_empty_tenant_size(neon_env_builder: NeonEnvBuilder):
|
||||
@@ -279,7 +278,7 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
|
||||
size_debug_file.write(size_debug)
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
|
||||
@skip_in_debug_build("only run with release build")
|
||||
def test_single_branch_get_tenant_size_grows(
|
||||
neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion
|
||||
):
|
||||
|
||||
@@ -427,7 +427,7 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder):
|
||||
env.pageserver.start()
|
||||
|
||||
for f in futs:
|
||||
f.result(timeout=10)
|
||||
f.result(timeout=30)
|
||||
|
||||
# The tenant should end up active
|
||||
wait_until_tenant_active(env.pageserver.http_client(), tenant_id, iterations=10, period=1)
|
||||
|
||||
@@ -1,15 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from fixtures.common_types import TenantId, TimelineArchivalState, TimelineId
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
last_flush_lsn_upload,
|
||||
)
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty
|
||||
from fixtures.remote_storage import s3_storage
|
||||
from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty, list_prefix
|
||||
from fixtures.remote_storage import S3Storage, s3_storage
|
||||
from fixtures.utils import wait_until
|
||||
from mypy_boto3_s3.type_defs import (
|
||||
ObjectTypeDef,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("shard_count", [0, 4])
|
||||
@@ -369,3 +376,146 @@ def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder, delete_timel
|
||||
neon_env_builder.pageserver_remote_storage,
|
||||
prefix=f"tenants/{str(tenant_id)}/",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("offload_child", ["offload", "offload-corrupt", "archive", None])
|
||||
def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Optional[str]):
|
||||
"""
|
||||
Ensure that retain_lsn functionality for timelines works, both for offloaded and non-offloaded ones
|
||||
"""
|
||||
if offload_child == "offload-corrupt":
|
||||
# Our corruption code only works with S3 compatible storage
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
# Turn off gc and compaction loops: we want to issue them manually for better reliability
|
||||
tenant_id, root_timeline_id = env.create_tenant(
|
||||
conf={
|
||||
# small checkpointing and compaction targets to ensure we generate many upload operations
|
||||
"checkpoint_distance": 128 * 1024,
|
||||
"compaction_threshold": 1,
|
||||
"compaction_target_size": 128 * 1024,
|
||||
# set small image creation thresholds so that gc deletes data
|
||||
"image_creation_threshold": 2,
|
||||
# disable background compaction and GC. We invoke it manually when we want it to happen.
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
# Disable pitr, we only want the latest lsn
|
||||
"pitr_interval": "0s",
|
||||
# Don't rely on endpoint lsn leases
|
||||
"lsn_lease_length": "0s",
|
||||
}
|
||||
)
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
"CREATE TABLE foo(v int, key serial primary key, t text default 'data_content')",
|
||||
"SELECT setseed(0.4321)",
|
||||
"INSERT INTO foo SELECT v FROM (SELECT generate_series(1,2048), (random() * 409600)::int as v) as random_numbers",
|
||||
]
|
||||
)
|
||||
pre_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
|
||||
log.info(f"Pre branch sum: {pre_branch_sum}")
|
||||
last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id)
|
||||
|
||||
# Create a branch and write some additional data to the parent
|
||||
child_timeline_id = env.create_branch("test_archived_branch", tenant_id)
|
||||
|
||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||
# Do some churn of the data. This is important so that we can overwrite image layers.
|
||||
for i in range(10):
|
||||
endpoint.safe_psql_many(
|
||||
[
|
||||
f"SELECT setseed(0.23{i})",
|
||||
"UPDATE foo SET v=(random() * 409600)::int WHERE v % 3 = 2",
|
||||
"UPDATE foo SET v=(random() * 409600)::int WHERE v % 3 = 1",
|
||||
"UPDATE foo SET v=(random() * 409600)::int WHERE v % 3 = 0",
|
||||
]
|
||||
)
|
||||
post_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
|
||||
log.info(f"Post branch sum: {post_branch_sum}")
|
||||
last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id)
|
||||
|
||||
if offload_child is not None:
|
||||
ps_http.timeline_archival_config(
|
||||
tenant_id,
|
||||
child_timeline_id,
|
||||
state=TimelineArchivalState.ARCHIVED,
|
||||
)
|
||||
leaf_detail = ps_http.timeline_detail(
|
||||
tenant_id,
|
||||
child_timeline_id,
|
||||
)
|
||||
assert leaf_detail["is_archived"] is True
|
||||
if "offload" in offload_child:
|
||||
ps_http.timeline_offload(tenant_id, child_timeline_id)
|
||||
|
||||
# Do a restart to get rid of any in-memory objects (we only init gc info once, at attach)
|
||||
env.pageserver.stop()
|
||||
if offload_child == "offload-corrupt":
|
||||
assert isinstance(env.pageserver_remote_storage, S3Storage)
|
||||
listing = list_prefix(
|
||||
env.pageserver_remote_storage, f"tenants/{str(tenant_id)}/tenant-manifest"
|
||||
)
|
||||
objects: list[ObjectTypeDef] = listing.get("Contents", [])
|
||||
assert len(objects) > 0
|
||||
remote_key: str = str(objects[0].get("Key", []))
|
||||
local_path = str(env.repo_dir / "tenant-manifest.json")
|
||||
|
||||
log.info(f"Downloading {remote_key} -> {local_path}")
|
||||
env.pageserver_remote_storage.client.download_file(
|
||||
env.pageserver_remote_storage.bucket_name, remote_key, local_path
|
||||
)
|
||||
|
||||
log.info(f"Corrupting {local_path}")
|
||||
with open(local_path) as manifest_json_file:
|
||||
manifest_json = json.load(manifest_json_file)
|
||||
for offloaded_timeline in manifest_json["offloaded_timelines"]:
|
||||
offloaded_timeline["ancestor_retain_lsn"] = None
|
||||
with open(local_path, "w") as manifest_json_file:
|
||||
json.dump(manifest_json, manifest_json_file)
|
||||
|
||||
log.info(f"Uploading {local_path} -> {remote_key}")
|
||||
env.pageserver_remote_storage.client.upload_file(
|
||||
local_path, env.pageserver_remote_storage.bucket_name, remote_key
|
||||
)
|
||||
# The point of our earlier efforts was to provoke these
|
||||
env.pageserver.allowed_errors.extend(
|
||||
[
|
||||
".*initial size calculation failed: PageRead.MissingKey.could not find data for key.*",
|
||||
".*page_service_conn_main.*could not find data for key.*",
|
||||
]
|
||||
)
|
||||
env.pageserver.start()
|
||||
|
||||
# Do an agressive gc and compaction of the parent branch
|
||||
ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=root_timeline_id, gc_horizon=0)
|
||||
ps_http.timeline_checkpoint(
|
||||
tenant_id,
|
||||
root_timeline_id,
|
||||
force_l0_compaction=True,
|
||||
force_repartition=True,
|
||||
wait_until_uploaded=True,
|
||||
compact=True,
|
||||
)
|
||||
|
||||
if offload_child is not None:
|
||||
ps_http.timeline_archival_config(
|
||||
tenant_id,
|
||||
child_timeline_id,
|
||||
state=TimelineArchivalState.UNARCHIVED,
|
||||
)
|
||||
|
||||
# Now, after unarchival, the child timeline should still have its data accessible (or corrupted)
|
||||
if offload_child == "offload-corrupt":
|
||||
with pytest.raises(RuntimeError, match=".*failed to get basebackup.*"):
|
||||
env.endpoints.create_start(
|
||||
"test_archived_branch", tenant_id=tenant_id, basebackup_request_tries=1
|
||||
)
|
||||
else:
|
||||
with env.endpoints.create_start("test_archived_branch", tenant_id=tenant_id) as endpoint:
|
||||
sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200")
|
||||
assert sum == pre_branch_sum
|
||||
|
||||
@@ -869,8 +869,17 @@ def test_sharded_timeline_detach_ancestor(neon_env_builder: NeonEnvBuilder):
|
||||
assert count == 10000
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", ["delete_timeline", "delete_tenant"])
|
||||
@pytest.mark.parametrize("sharded", [False, True])
|
||||
@pytest.mark.parametrize(
|
||||
"mode, sharded",
|
||||
[
|
||||
("delete_timeline", False),
|
||||
("delete_timeline", True),
|
||||
("delete_tenant", False),
|
||||
# the shared/exclusive lock for tenant is blocking this:
|
||||
# timeline detach ancestor takes shared, delete tenant takes exclusive
|
||||
# ("delete_tenant", True)
|
||||
],
|
||||
)
|
||||
def test_timeline_detach_ancestor_interrupted_by_deletion(
|
||||
neon_env_builder: NeonEnvBuilder, mode: str, sharded: bool
|
||||
):
|
||||
@@ -885,11 +894,6 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(
|
||||
- shutdown winning over complete, see test_timeline_is_deleted_before_timeline_detach_ancestor_completes
|
||||
"""
|
||||
|
||||
if sharded and mode == "delete_tenant":
|
||||
# the shared/exclusive lock for tenant is blocking this:
|
||||
# timeline detach ancestor takes shared, delete tenant takes exclusive
|
||||
pytest.skip("tenant deletion while timeline ancestor detach is underway cannot happen")
|
||||
|
||||
shard_count = 2 if sharded else 1
|
||||
|
||||
neon_env_builder.num_pageservers = shard_count
|
||||
|
||||
@@ -54,6 +54,8 @@ from fixtures.utils import (
|
||||
PropagatingThread,
|
||||
get_dir_size,
|
||||
query_scalar,
|
||||
run_only_on_default_postgres,
|
||||
skip_in_debug_build,
|
||||
start_in_background,
|
||||
wait_until,
|
||||
)
|
||||
@@ -2104,10 +2106,9 @@ def test_pull_timeline_while_evicted(neon_env_builder: NeonEnvBuilder):
|
||||
# The only way to verify this without manipulating time is to sleep for a while.
|
||||
# In this test we sleep for 60 seconds, so this test takes at least 1 minute to run.
|
||||
# This is longer than most other tests, we run it only for v16 to save CI resources.
|
||||
@run_only_on_default_postgres("run only on release build to save CI resources")
|
||||
@skip_in_debug_build("run only on release build to save CI resources")
|
||||
def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):
|
||||
if os.environ.get("PYTEST_CURRENT_TEST", "").find("[debug-pg16]") == -1:
|
||||
pytest.skip("run only on debug postgres v16 to save CI resources")
|
||||
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from fixtures.common_types import Lsn, TenantId, TimelineId
|
||||
from fixtures.log_helper import getLogger
|
||||
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
from fixtures.utils import skip_in_debug_build
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import Optional
|
||||
@@ -760,10 +761,8 @@ async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint, test_output_dir: Pat
|
||||
# The test takes more than default 5 minutes on Postgres 16,
|
||||
# see https://github.com/neondatabase/neon/issues/5305
|
||||
@pytest.mark.timeout(600)
|
||||
@skip_in_debug_build("times out in debug builds")
|
||||
def test_wal_lagging(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, build_type: str):
|
||||
if build_type == "debug":
|
||||
pytest.skip("times out in debug builds")
|
||||
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: 2199b83fb7...de0a000daf
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: 22e580fe9f...fd631a9590
2
vendor/postgres-v16
vendored
2
vendor/postgres-v16
vendored
Submodule vendor/postgres-v16 updated: e131a9c027...03b43900ed
2
vendor/postgres-v17
vendored
2
vendor/postgres-v17
vendored
Submodule vendor/postgres-v17 updated: 9ad2f3c5c3...ae4cc30dba
8
vendor/revisions.json
vendored
8
vendor/revisions.json
vendored
@@ -1,18 +1,18 @@
|
||||
{
|
||||
"v17": [
|
||||
"17.0",
|
||||
"9ad2f3c5c37c08069a01c1e3f6b7cf275437e0cb"
|
||||
"ae4cc30dba24f3910533e5a48e8103c3f2fff300"
|
||||
],
|
||||
"v16": [
|
||||
"16.4",
|
||||
"e131a9c027b202ce92bd7b9cf2569d48a6f9948e"
|
||||
"03b43900edc5d8d6eecec460bfc89aec7174bd84"
|
||||
],
|
||||
"v15": [
|
||||
"15.8",
|
||||
"22e580fe9ffcea7e02592110b1c9bf426d83cada"
|
||||
"fd631a959049dfe2b82f67409c8b8b0d3e0016d1"
|
||||
],
|
||||
"v14": [
|
||||
"14.13",
|
||||
"2199b83fb72680001ce0f43bf6187a21dfb8f45d"
|
||||
"de0a000dafc2e66ce2e39282d3aa1c704fe0390e"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user