Merge branch 'main' into yuchen/direct-io-reads-in-ci-by-default

This commit is contained in:
Yuchen Liang
2024-11-11 16:26:01 -05:00
committed by GitHub
37 changed files with 1066 additions and 389 deletions

View File

@@ -20,3 +20,4 @@ config-variables:
- REMOTE_STORAGE_AZURE_REGION
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
- DEV_AWS_OIDC_ROLE_ARN
- BENCHMARK_INGEST_TARGET_PROJECTID

372
.github/workflows/ingest_benchmark.yml vendored Normal file
View File

@@ -0,0 +1,372 @@
name: Benchmarking
on:
# uncomment to run on push for debugging your PR
# push:
# branches: [ your branch ]
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 9 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow globally because we need dedicated resources which only exist once
group: ingest-bench-workflow
cancel-in-progress: true
jobs:
ingest:
strategy:
matrix:
target_project: [new_empty_project, large_existing_project]
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config
PSQL: /tmp/neon/pg_install/v16/bin/psql
PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib
PGCOPYDB: /pgcopydb/bin/pgcopydb
PGCOPYDB_LIB_PATH: /pgcopydb/lib
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
timeout-minutes: 1440
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials # necessary to download artefacts
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
if: ${{ matrix.target_project == 'new_empty_project' }}
id: create-neon-project-ingest-target
uses: ./.github/actions/neon-project-create
with:
region_id: aws-us-east-2
postgres_version: 16
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Initialize Neon project and retrieve current backpressure seconds
if: ${{ matrix.target_project == 'new_empty_project' }}
env:
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
run: |
echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
- name: Create Neon Branch for large tenant
if: ${{ matrix.target_project == 'large_existing_project' }}
id: create-neon-branch-ingest-target
uses: ./.github/actions/neon-branch-create
with:
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Initialize Neon project and retrieve current backpressure seconds
if: ${{ matrix.target_project == 'large_existing_project' }}
env:
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
run: |
echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}"
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
# Extract the part before the database name
base_connstr="${NEW_PROJECT_CONNSTR%/*}"
# Extract the query parameters (if any) after the database name
query_params="${NEW_PROJECT_CONNSTR#*\?}"
# Reconstruct the new connection string
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
new_connstr="${base_connstr}/neondb?${query_params}"
else
new_connstr="${base_connstr}/neondb"
fi
${PSQL} "${new_connstr}" -c "drop database ludicrous;"
${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;"
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}"
else
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous"
fi
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
- name: Create pgcopydb filter file
run: |
cat << EOF > /tmp/pgcopydb_filter.txt
[include-only-table]
public.events
public.emails
public.email_transmissions
public.payments
public.editions
public.edition_modules
public.sp_content
public.email_broadcasts
public.user_collections
public.devices
public.user_accounts
public.lessons
public.lesson_users
public.payment_methods
public.orders
public.course_emails
public.modules
public.users
public.module_users
public.courses
public.payment_gateway_keys
public.accounts
public.roles
public.payment_gateways
public.management
public.event_names
EOF
- name: Invoke pgcopydb
env:
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
run: |
export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH}
export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}"
export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}"
export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
${PG_CONFIG} --bindir
${PGCOPYDB} --version
${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \
--index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \
--use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log
# create dummy pgcopydb log to test parsing
# - name: create dummy log for parser test
# run: |
# cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log
# 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb"
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
# 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb"
# 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database
# 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID
# 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid.
# 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id.
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk
# 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints)
# 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences
# 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions
# 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database
# 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data)
# 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
# 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database
# 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
# 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes
# 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes
# 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found.
# 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values
# 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database
# 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database
# 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
# 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed
# 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes
# OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration
# ------+--------+----------------------+-------+---------------+-------------------+---------+----------------------
# 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m
# 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m
# 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s
# 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s
# 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s
# 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s
# 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s
# 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s
# 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s
# 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s
# 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms
# 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s
# 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s
# 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749
# 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms
# 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696
# 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032
# 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms
# 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms
# 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms
# 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms
# 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms
# 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms
# 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms
# 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms
# 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms
# Step Connection Duration Transfer Concurrency
# -------------------------------------------------- ---------- ---------- ---------- ------------
# Catalog Queries (table ordering, filtering, etc) source 12s 1
# Dump Schema source 765ms 1
# Prepare Schema target 466ms 1
# COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12
# COPY (cumulative) both 7h46m 1225 GB 4
# CREATE INDEX (cumulative) target 4h36m 4
# CONSTRAINTS (cumulative) target 8s493 4
# VACUUM (cumulative) target 0ms 4
# Reset Sequences both 60ms 1
# Large Objects (cumulative) (null) 0ms 0
# Finalize Schema both 14h01m 4
# -------------------------------------------------- ---------- ---------- ---------- ------------
# Total Wall Clock Duration both 16h49m 20
# EOF
- name: show tables sizes and retrieve current backpressure seconds
run: |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+"
BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV
- name: Parse pgcopydb log and report performance metrics
env:
PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
run: |
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
# Define the log file path
LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log"
# Get the current git commit hash
git config --global --add safe.directory /__w/neon/neon
COMMIT_HASH=$(git rev-parse --short HEAD)
# Define the platform and test suite
PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging"
SUIT="pgcopydb_ingest_bench"
# Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds
convert_to_seconds() {
local duration=$1
local total_seconds=0
# Check for hours (h)
if [[ "$duration" =~ ([0-9]+)h ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600))
fi
# Check for seconds (s)
if [[ "$duration" =~ ([0-9]+)s ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0}))
fi
# Check for milliseconds (ms) (if applicable)
if [[ "$duration" =~ ([0-9]+)ms ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000))
duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m
fi
# Check for minutes (m) - must be checked after ms because m is contained in ms
if [[ "$duration" =~ ([0-9]+)m ]]; then
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60))
fi
echo $total_seconds
}
# Calculate the backpressure difference in seconds
BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}")
# Insert the backpressure time difference into the performance database
if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now());
\""
echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds"
eval $PSQL_CMD
fi
# Extract and process log lines
while IFS= read -r line; do
METRIC_NAME=""
# Match each desired line and extract the relevant information
if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then
METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)"
elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then
METRIC_NAME="COPY (cumulative)"
elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then
METRIC_NAME="CREATE INDEX (cumulative)"
elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then
METRIC_NAME="CONSTRAINTS (cumulative)"
elif [[ "$line" =~ Finalize\ Schema.* ]]; then
METRIC_NAME="Finalize Schema"
elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then
METRIC_NAME="Total Wall Clock Duration"
fi
# If a metric was matched, insert it into the performance database
if [ -n "$METRIC_NAME" ]; then
DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1)
METRIC_VALUE=$(convert_to_seconds "$DURATION")
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now());
\""
echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds"
eval $PSQL_CMD
fi
done < "$LOG_FILE"
- name: Delete Neon Project
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Delete Neon Branch for large tenant
if: ${{ always() && matrix.target_project == 'large_existing_project' }}
uses: ./.github/actions/neon-branch-delete
with:
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
branch_id: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}

View File

@@ -0,0 +1,29 @@
name: Report Workflow Stats Batch
on:
schedule:
- cron: '*/15 * * * *'
- cron: '25 0 * * *'
jobs:
gh-workflow-stats-batch:
name: GitHub Workflow Stats Batch
runs-on: ubuntu-22.04
permissions:
actions: read
steps:
- name: Export Workflow Run for the past 2 hours
uses: neondatabase/gh-workflow-stats-action@v0.2.1
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_batch_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '2h'
- name: Export Workflow Run for the past 24 hours
if: github.event.schedule == '25 0 * * *'
uses: neondatabase/gh-workflow-stats-action@v0.2.1
with:
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
db_table: "gh_workflow_stats_batch_neon"
gh_token: ${{ secrets.GITHUB_TOKEN }}
duration: '24h'

View File

@@ -1475,6 +1475,8 @@ RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml /etc/sql_exporter.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml /etc/neon_collector.yml
COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml

View File

View File

@@ -26,7 +26,7 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: sql-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -26,7 +26,7 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: sql-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -74,10 +74,17 @@ pub fn write_postgres_conf(
}
// Locales
writeln!(file, "lc_messages='C.UTF-8'")?;
writeln!(file, "lc_monetary='C.UTF-8'")?;
writeln!(file, "lc_time='C.UTF-8'")?;
writeln!(file, "lc_numeric='C.UTF-8'")?;
if cfg!(target_os = "macos") {
writeln!(file, "lc_messages='C'")?;
writeln!(file, "lc_monetary='C'")?;
writeln!(file, "lc_time='C'")?;
writeln!(file, "lc_numeric='C'")?;
} else {
writeln!(file, "lc_messages='C.UTF-8'")?;
writeln!(file, "lc_monetary='C.UTF-8'")?;
writeln!(file, "lc_time='C.UTF-8'")?;
writeln!(file, "lc_numeric='C.UTF-8'")?;
}
match spec.mode {
ComputeMode::Primary => {}

View File

@@ -277,7 +277,11 @@ pub mod defaults {
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
pub const DEFAULT_LOCALE: &str = "C.UTF-8";
pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
"C"
} else {
"C.UTF-8"
};
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;

View File

@@ -35,6 +35,15 @@ pub fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
!(a.end <= b.start || b.end <= a.start)
}
/// Whether a fully contains b, example as below
/// ```plain
/// | a |
/// | b |
/// ```
pub fn fully_contains<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
a.start <= b.start && a.end >= b.end
}
pub fn union_to_keyspace<K: Ord>(a: &mut CompactionKeySpace<K>, b: CompactionKeySpace<K>) {
let x = std::mem::take(a);
let mut all_ranges_iter = [x.into_iter(), b.into_iter()]

View File

@@ -4786,12 +4786,6 @@ async fn run_initdb(
.args(["--username", &conf.superuser])
.args(["--encoding", "utf8"])
.args(["--locale", &conf.locale])
.args(["--lc-collate", &conf.locale])
.args(["--lc-ctype", &conf.locale])
.args(["--lc-messages", &conf.locale])
.args(["--lc-monetary", &conf.locale])
.args(["--lc-numeric", &conf.locale])
.args(["--lc-time", &conf.locale])
.arg("--no-instructions")
.arg("--no-sync")
.env_clear()
@@ -9229,6 +9223,23 @@ mod tests {
Ok(())
}
fn sort_layer_key(k1: &PersistentLayerKey, k2: &PersistentLayerKey) -> std::cmp::Ordering {
(
k1.is_delta,
k1.key_range.start,
k1.key_range.end,
k1.lsn_range.start,
k1.lsn_range.end,
)
.cmp(&(
k2.is_delta,
k2.key_range.start,
k2.key_range.end,
k2.lsn_range.start,
k2.lsn_range.end,
))
}
async fn inspect_and_sort(
tline: &Arc<Timeline>,
filter: Option<std::ops::Range<Key>>,
@@ -9237,25 +9248,30 @@ mod tests {
if let Some(filter) = filter {
all_layers.retain(|layer| overlaps_with(&layer.key_range, &filter));
}
all_layers.sort_by(|k1, k2| {
(
k1.is_delta,
k1.key_range.start,
k1.key_range.end,
k1.lsn_range.start,
k1.lsn_range.end,
)
.cmp(&(
k2.is_delta,
k2.key_range.start,
k2.key_range.end,
k2.lsn_range.start,
k2.lsn_range.end,
))
});
all_layers.sort_by(sort_layer_key);
all_layers
}
#[cfg(feature = "testing")]
fn check_layer_map_key_eq(
mut left: Vec<PersistentLayerKey>,
mut right: Vec<PersistentLayerKey>,
) {
left.sort_by(sort_layer_key);
right.sort_by(sort_layer_key);
if left != right {
eprintln!("---LEFT---");
for left in left.iter() {
eprintln!("{}", left);
}
eprintln!("---RIGHT---");
for right in right.iter() {
eprintln!("{}", right);
}
assert_eq!(left, right);
}
}
#[cfg(feature = "testing")]
#[tokio::test]
async fn test_simple_partial_bottom_most_compaction() -> anyhow::Result<()> {
@@ -9348,127 +9364,206 @@ mod tests {
let cancel = CancellationToken::new();
// Do a partial compaction on key range 0..4, we should generate a image layer; no other layers
// can be removed because they might be used for other key ranges.
// Do a partial compaction on key range 0..2
tline
.partial_compact_with_gc(Some(get_key(0)..get_key(4)), &cancel, EnumSet::new(), &ctx)
.partial_compact_with_gc(get_key(0)..get_key(2), &cancel, EnumSet::new(), &ctx)
.await
.unwrap();
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
assert_eq!(
check_layer_map_key_eq(
all_layers,
vec![
// newly-generated image layer for the partial compaction range 0-2
PersistentLayerKey {
key_range: get_key(0)..get_key(4),
key_range: get_key(0)..get_key(2),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(0)..get_key(10),
lsn_range: Lsn(0x10)..Lsn(0x11),
is_delta: false
is_delta: false,
},
// delta1 is split and the second part is rewritten
PersistentLayerKey {
key_range: get_key(1)..get_key(4),
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true
is_delta: true,
},
PersistentLayerKey {
key_range: get_key(5)..get_key(7),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true
is_delta: true,
},
PersistentLayerKey {
key_range: get_key(8)..get_key(10),
lsn_range: Lsn(0x48)..Lsn(0x50),
is_delta: true
}
]
is_delta: true,
},
],
);
// Do a partial compaction on key range 4..10
// Do a partial compaction on key range 2..4
tline
.partial_compact_with_gc(Some(get_key(4)..get_key(10)), &cancel, EnumSet::new(), &ctx)
.partial_compact_with_gc(get_key(2)..get_key(4), &cancel, EnumSet::new(), &ctx)
.await
.unwrap();
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
assert_eq!(
check_layer_map_key_eq(
all_layers,
vec![
PersistentLayerKey {
key_range: get_key(0)..get_key(4),
key_range: get_key(0)..get_key(2),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false
is_delta: false,
},
PersistentLayerKey {
// if (in the future) GC kicks in, this layer will be removed
key_range: get_key(0)..get_key(10),
lsn_range: Lsn(0x10)..Lsn(0x11),
is_delta: false
is_delta: false,
},
// image layer generated for the compaction range 2-4
PersistentLayerKey {
key_range: get_key(4)..get_key(10),
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false
is_delta: false,
},
// we have key2/key3 above the retain_lsn, so we still need this delta layer
PersistentLayerKey {
key_range: get_key(1)..get_key(4),
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true
is_delta: true,
},
PersistentLayerKey {
key_range: get_key(5)..get_key(7),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true
is_delta: true,
},
PersistentLayerKey {
key_range: get_key(8)..get_key(10),
lsn_range: Lsn(0x48)..Lsn(0x50),
is_delta: true
}
]
is_delta: true,
},
],
);
// Do a partial compaction on key range 4..9
tline
.partial_compact_with_gc(get_key(4)..get_key(9), &cancel, EnumSet::new(), &ctx)
.await
.unwrap();
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
check_layer_map_key_eq(
all_layers,
vec![
PersistentLayerKey {
key_range: get_key(0)..get_key(2),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(0)..get_key(10),
lsn_range: Lsn(0x10)..Lsn(0x11),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true,
},
// image layer generated for this compaction range
PersistentLayerKey {
key_range: get_key(4)..get_key(9),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(8)..get_key(10),
lsn_range: Lsn(0x48)..Lsn(0x50),
is_delta: true,
},
],
);
// Do a partial compaction on key range 9..10
tline
.partial_compact_with_gc(get_key(9)..get_key(10), &cancel, EnumSet::new(), &ctx)
.await
.unwrap();
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
check_layer_map_key_eq(
all_layers,
vec![
PersistentLayerKey {
key_range: get_key(0)..get_key(2),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(0)..get_key(10),
lsn_range: Lsn(0x10)..Lsn(0x11),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true,
},
PersistentLayerKey {
key_range: get_key(4)..get_key(9),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
// image layer generated for the compaction range
PersistentLayerKey {
key_range: get_key(9)..get_key(10),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(8)..get_key(10),
lsn_range: Lsn(0x48)..Lsn(0x50),
is_delta: true,
},
],
);
// Do a partial compaction on key range 0..10, all image layers below LSN 20 can be replaced with new ones.
tline
.partial_compact_with_gc(Some(get_key(0)..get_key(10)), &cancel, EnumSet::new(), &ctx)
.partial_compact_with_gc(get_key(0)..get_key(10), &cancel, EnumSet::new(), &ctx)
.await
.unwrap();
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
assert_eq!(
check_layer_map_key_eq(
all_layers,
vec![
PersistentLayerKey {
key_range: get_key(0)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false
},
// aha, we removed all unnecessary image/delta layers and got a very clean layer map!
PersistentLayerKey {
key_range: get_key(0)..get_key(10),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false
is_delta: false,
},
PersistentLayerKey {
key_range: get_key(4)..get_key(10),
lsn_range: Lsn(0x20)..Lsn(0x21),
is_delta: false
},
PersistentLayerKey {
key_range: get_key(1)..get_key(4),
key_range: get_key(2)..get_key(4),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true
},
PersistentLayerKey {
key_range: get_key(5)..get_key(7),
lsn_range: Lsn(0x20)..Lsn(0x48),
is_delta: true
is_delta: true,
},
PersistentLayerKey {
key_range: get_key(8)..get_key(10),
lsn_range: Lsn(0x48)..Lsn(0x50),
is_delta: true
}
]
is_delta: true,
},
],
);
Ok(())

View File

@@ -653,6 +653,10 @@ impl DeltaLayerWriter {
})
}
pub fn is_empty(&self) -> bool {
self.inner.as_ref().unwrap().num_keys == 0
}
///
/// Append a key-value pair to the file.
///

View File

@@ -1,4 +1,4 @@
use std::ops::Range;
use std::{ops::Range, sync::Arc};
use anyhow::bail;
use pageserver_api::{
@@ -9,7 +9,10 @@ use utils::lsn::Lsn;
use pageserver_api::value::Value;
use super::merge_iterator::MergeIterator;
use super::{
merge_iterator::{MergeIterator, MergeIteratorItem},
PersistentLayerKey,
};
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
///
@@ -48,10 +51,10 @@ impl<'a> FilterIterator<'a> {
})
}
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
while let Some(item) = self.inner.next().await? {
async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {
while let Some(item) = self.inner.next_inner::<R>().await? {
while self.current_filter_idx < self.retain_key_filters.len()
&& item.0 >= self.retain_key_filters[self.current_filter_idx].end
&& item.key_lsn_value().0 >= self.retain_key_filters[self.current_filter_idx].end
{
// [filter region] [filter region] [filter region]
// ^ item
@@ -68,7 +71,7 @@ impl<'a> FilterIterator<'a> {
// ^ current filter (nothing)
return Ok(None);
}
if self.retain_key_filters[self.current_filter_idx].contains(&item.0) {
if self.retain_key_filters[self.current_filter_idx].contains(&item.key_lsn_value().0) {
// [filter region] [filter region] [filter region]
// ^ item
// ^ current filter
@@ -81,6 +84,16 @@ impl<'a> FilterIterator<'a> {
}
Ok(None)
}
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
self.next_inner().await
}
pub async fn next_with_trace(
&mut self,
) -> anyhow::Result<Option<((Key, Lsn, Value), Arc<PersistentLayerKey>)>> {
self.next_inner().await
}
}
#[cfg(test)]

View File

@@ -1,6 +1,7 @@
use std::{
cmp::Ordering,
collections::{binary_heap, BinaryHeap},
sync::Arc,
};
use anyhow::bail;
@@ -13,10 +14,11 @@ use pageserver_api::value::Value;
use super::{
delta_layer::{DeltaLayerInner, DeltaLayerIterator},
image_layer::{ImageLayerInner, ImageLayerIterator},
PersistentLayerDesc, PersistentLayerKey,
};
#[derive(Clone, Copy)]
enum LayerRef<'a> {
pub(crate) enum LayerRef<'a> {
Image(&'a ImageLayerInner),
Delta(&'a DeltaLayerInner),
}
@@ -62,18 +64,20 @@ impl LayerIterRef<'_> {
/// 1. Unified iterator for image and delta layers.
/// 2. `Ord` for use in [`MergeIterator::heap`] (for the k-merge).
/// 3. Lazy creation of the real delta/image iterator.
enum IteratorWrapper<'a> {
pub(crate) enum IteratorWrapper<'a> {
NotLoaded {
ctx: &'a RequestContext,
first_key_lower_bound: (Key, Lsn),
layer: LayerRef<'a>,
source_desc: Arc<PersistentLayerKey>,
},
Loaded {
iter: PeekableLayerIterRef<'a>,
source_desc: Arc<PersistentLayerKey>,
},
}
struct PeekableLayerIterRef<'a> {
pub(crate) struct PeekableLayerIterRef<'a> {
iter: LayerIterRef<'a>,
peeked: Option<(Key, Lsn, Value)>, // None == end
}
@@ -151,6 +155,12 @@ impl<'a> IteratorWrapper<'a> {
layer: LayerRef::Image(image_layer),
first_key_lower_bound: (image_layer.key_range().start, image_layer.lsn()),
ctx,
source_desc: PersistentLayerKey {
key_range: image_layer.key_range().clone(),
lsn_range: PersistentLayerDesc::image_layer_lsn_range(image_layer.lsn()),
is_delta: false,
}
.into(),
}
}
@@ -162,12 +172,18 @@ impl<'a> IteratorWrapper<'a> {
layer: LayerRef::Delta(delta_layer),
first_key_lower_bound: (delta_layer.key_range().start, delta_layer.lsn_range().start),
ctx,
source_desc: PersistentLayerKey {
key_range: delta_layer.key_range().clone(),
lsn_range: delta_layer.lsn_range().clone(),
is_delta: true,
}
.into(),
}
}
fn peek_next_key_lsn_value(&self) -> Option<(&Key, Lsn, Option<&Value>)> {
match self {
Self::Loaded { iter } => iter
Self::Loaded { iter, .. } => iter
.peek()
.as_ref()
.map(|(key, lsn, val)| (key, *lsn, Some(val))),
@@ -191,6 +207,7 @@ impl<'a> IteratorWrapper<'a> {
ctx,
first_key_lower_bound,
layer,
source_desc,
} = self
else {
unreachable!()
@@ -206,7 +223,10 @@ impl<'a> IteratorWrapper<'a> {
);
}
}
*self = Self::Loaded { iter };
*self = Self::Loaded {
iter,
source_desc: source_desc.clone(),
};
Ok(())
}
@@ -220,11 +240,19 @@ impl<'a> IteratorWrapper<'a> {
/// The public interfaces to use are [`crate::tenant::storage_layer::delta_layer::DeltaLayerIterator`] and
/// [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].
async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
let Self::Loaded { iter } = self else {
let Self::Loaded { iter, .. } = self else {
panic!("must load the iterator before using")
};
iter.next().await
}
/// Get the persistent layer key corresponding to this iterator
fn trace_source(&self) -> Arc<PersistentLayerKey> {
match self {
Self::Loaded { source_desc, .. } => source_desc.clone(),
Self::NotLoaded { source_desc, .. } => source_desc.clone(),
}
}
}
/// A merge iterator over delta/image layer iterators.
@@ -242,6 +270,32 @@ pub struct MergeIterator<'a> {
heap: BinaryHeap<IteratorWrapper<'a>>,
}
pub(crate) trait MergeIteratorItem {
fn new(item: (Key, Lsn, Value), iterator: &IteratorWrapper<'_>) -> Self;
fn key_lsn_value(&self) -> &(Key, Lsn, Value);
}
impl MergeIteratorItem for (Key, Lsn, Value) {
fn new(item: (Key, Lsn, Value), _: &IteratorWrapper<'_>) -> Self {
item
}
fn key_lsn_value(&self) -> &(Key, Lsn, Value) {
self
}
}
impl MergeIteratorItem for ((Key, Lsn, Value), Arc<PersistentLayerKey>) {
fn new(item: (Key, Lsn, Value), iter: &IteratorWrapper<'_>) -> Self {
(item, iter.trace_source().clone())
}
fn key_lsn_value(&self) -> &(Key, Lsn, Value) {
&self.0
}
}
impl<'a> MergeIterator<'a> {
pub fn create(
deltas: &[&'a DeltaLayerInner],
@@ -260,7 +314,7 @@ impl<'a> MergeIterator<'a> {
}
}
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
pub(crate) async fn next_inner<R: MergeIteratorItem>(&mut self) -> anyhow::Result<Option<R>> {
while let Some(mut iter) = self.heap.peek_mut() {
if !iter.is_loaded() {
// Once we load the iterator, we can know the real first key-value pair in the iterator.
@@ -275,10 +329,22 @@ impl<'a> MergeIterator<'a> {
binary_heap::PeekMut::pop(iter);
continue;
};
return Ok(Some(item));
return Ok(Some(R::new(item, &iter)));
}
Ok(None)
}
/// Get the next key-value pair from the iterator.
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
self.next_inner().await
}
/// Get the next key-value pair from the iterator, and trace where the key comes from.
pub async fn next_with_trace(
&mut self,
) -> anyhow::Result<Option<((Key, Lsn, Value), Arc<PersistentLayerKey>)>> {
self.next_inner().await
}
}
#[cfg(test)]

View File

@@ -4,7 +4,7 @@
//!
//! The old legacy algorithm is implemented directly in `timeline.rs`.
use std::collections::{BinaryHeap, HashSet};
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::ops::{Deref, Range};
use std::sync::Arc;
@@ -56,7 +56,7 @@ use pageserver_api::value::Value;
use utils::lsn::Lsn;
use pageserver_compaction::helpers::overlaps_with;
use pageserver_compaction::helpers::{fully_contains, overlaps_with};
use pageserver_compaction::interface::*;
use super::CompactionError;
@@ -64,6 +64,23 @@ use super::CompactionError;
/// Maximum number of deltas before generating an image layer in bottom-most compaction.
const COMPACTION_DELTA_THRESHOLD: usize = 5;
pub struct GcCompactionJobDescription {
/// All layers to read in the compaction job
selected_layers: Vec<Layer>,
/// GC cutoff of the job
gc_cutoff: Lsn,
/// LSNs to retain for the job
retain_lsns_below_horizon: Vec<Lsn>,
/// Maximum layer LSN processed in this compaction
max_layer_lsn: Lsn,
/// Only compact layers overlapping with this range
compaction_key_range: Range<Key>,
/// When partial compaction is enabled, these layers need to be rewritten to ensure no overlap.
/// This field is here solely for debugging. The field will not be read once the compaction
/// description is generated.
rewrite_layers: Vec<Arc<PersistentLayerDesc>>,
}
/// The result of bottom-most compaction for a single key at each LSN.
#[derive(Debug)]
#[cfg_attr(test, derive(PartialEq))]
@@ -1722,7 +1739,8 @@ impl Timeline {
flags: EnumSet<CompactFlags>,
ctx: &RequestContext,
) -> anyhow::Result<()> {
self.partial_compact_with_gc(None, cancel, flags, ctx).await
self.partial_compact_with_gc(Key::MIN..Key::MAX, cancel, flags, ctx)
.await
}
/// An experimental compaction building block that combines compaction with garbage collection.
@@ -1732,12 +1750,15 @@ impl Timeline {
/// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon,
/// and create delta layers with all deltas >= gc horizon.
///
/// If `key_range`, it will only compact the keys within the range, aka partial compaction. This functionality
/// is not complete yet, and if it is set, only image layers will be generated.
///
/// If `key_range` is provided, it will only compact the keys within the range, aka partial compaction.
/// Partial compaction will read and process all layers overlapping with the key range, even if it might
/// contain extra keys. After the gc-compaction phase completes, delta layers that are not fully contained
/// within the key range will be rewritten to ensure they do not overlap with the delta layers. Providing
/// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not
/// part of the range.
pub(crate) async fn partial_compact_with_gc(
self: &Arc<Self>,
compaction_key_range: Option<Range<Key>>,
compaction_key_range: Range<Key>,
cancel: &CancellationToken,
flags: EnumSet<CompactFlags>,
ctx: &RequestContext,
@@ -1762,9 +1783,8 @@ impl Timeline {
.await?;
let dry_run = flags.contains(CompactFlags::DryRun);
let partial_compaction = compaction_key_range.is_some();
if let Some(ref compaction_key_range) = compaction_key_range {
if compaction_key_range == (Key::MIN..Key::MAX) {
info!("running enhanced gc bottom-most compaction, dry_run={dry_run}, compaction_key_range={}..{}", compaction_key_range.start, compaction_key_range.end);
} else {
info!("running enhanced gc bottom-most compaction, dry_run={dry_run}");
@@ -1780,7 +1800,7 @@ impl Timeline {
// The layer selection has the following properties:
// 1. If a layer is in the selection, all layers below it are in the selection.
// 2. Inferred from (1), for each key in the layer selection, the value can be reconstructed only with the layers in the layer selection.
let (layer_selection, gc_cutoff, retain_lsns_below_horizon) = if !partial_compaction {
let job_desc = {
let guard = self.layers.read().await;
let layers = guard.layer_map()?;
let gc_info = self.gc_info.read().unwrap();
@@ -1810,9 +1830,21 @@ impl Timeline {
};
// Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key
// layers to compact.
let mut rewrite_layers = Vec::new();
for desc in layers.iter_historic_layers() {
if desc.get_lsn_range().end <= max_layer_lsn {
if desc.get_lsn_range().end <= max_layer_lsn
&& overlaps_with(&desc.get_key_range(), &compaction_key_range)
{
// If the layer overlaps with the compaction key range, we need to read it to obtain all keys within the range,
// even if it might contain extra keys
selected_layers.push(guard.get_from_desc(&desc));
// If the layer is not fully contained within the key range, we need to rewrite it if it's a delta layer (it's fine
// to overlap image layers)
if desc.is_delta()
&& !fully_contains(&compaction_key_range, &desc.get_key_range())
{
rewrite_layers.push(desc);
}
}
}
if selected_layers.is_empty() {
@@ -1820,82 +1852,59 @@ impl Timeline {
return Ok(());
}
retain_lsns_below_horizon.sort();
(selected_layers, gc_cutoff, retain_lsns_below_horizon)
} else {
// In case of partial compaction, we currently only support generating image layers, and therefore,
// we pick all layers that are below the lowest retain_lsn and does not intersect with any of the layers.
let guard = self.layers.read().await;
let layers = guard.layer_map()?;
let gc_info = self.gc_info.read().unwrap();
let mut min_lsn = gc_info.cutoffs.select_min();
for (lsn, _, _) in &gc_info.retain_lsns {
if lsn < &min_lsn {
min_lsn = *lsn;
}
GcCompactionJobDescription {
selected_layers,
gc_cutoff,
retain_lsns_below_horizon,
max_layer_lsn,
compaction_key_range,
rewrite_layers,
}
for lsn in gc_info.leases.keys() {
if lsn < &min_lsn {
min_lsn = *lsn;
}
}
let mut selected_layers = Vec::new();
drop(gc_info);
// |-------| |-------| |-------|
// | Delta | | Delta | | Delta | -- min_lsn could be intersecting with the layers
// |-------| |-------| |-------| <- we want to pick all the layers below min_lsn, so that
// | Delta | | Delta | | Delta | ...we can remove them after compaction
// |-------| |-------| |-------|
// Pick all the layers intersect or below the min_lsn, get the largest LSN in the selected layers.
let Some(compaction_key_range) = compaction_key_range.as_ref() else {
unreachable!()
};
for desc in layers.iter_historic_layers() {
if desc.get_lsn_range().end <= min_lsn
&& overlaps_with(&desc.key_range, compaction_key_range)
{
selected_layers.push(guard.get_from_desc(&desc));
}
}
if selected_layers.is_empty() {
info!("no layers to compact with gc");
return Ok(());
}
(selected_layers, min_lsn, Vec::new())
};
let lowest_retain_lsn = if self.ancestor_timeline.is_some() {
if partial_compaction {
warn!("partial compaction cannot run on child branches (for now)");
return Ok(());
}
Lsn(self.ancestor_lsn.0 + 1)
} else {
let res = retain_lsns_below_horizon
let res = job_desc
.retain_lsns_below_horizon
.first()
.copied()
.unwrap_or(gc_cutoff);
.unwrap_or(job_desc.gc_cutoff);
if cfg!(debug_assertions) {
assert_eq!(
res,
retain_lsns_below_horizon
job_desc
.retain_lsns_below_horizon
.iter()
.min()
.copied()
.unwrap_or(gc_cutoff)
.unwrap_or(job_desc.gc_cutoff)
);
}
res
};
info!(
"picked {} layers for compaction with gc_cutoff={} lowest_retain_lsn={}",
layer_selection.len(),
gc_cutoff,
lowest_retain_lsn
"picked {} layers for compaction ({} layers need rewriting) with max_layer_lsn={} gc_cutoff={} lowest_retain_lsn={}, key_range={}..{}",
job_desc.selected_layers.len(),
job_desc.rewrite_layers.len(),
job_desc.max_layer_lsn,
job_desc.gc_cutoff,
lowest_retain_lsn,
job_desc.compaction_key_range.start,
job_desc.compaction_key_range.end
);
self.check_compaction_space(&layer_selection).await?;
for layer in &job_desc.selected_layers {
debug!("read layer: {}", layer.layer_desc().key());
}
for layer in &job_desc.rewrite_layers {
debug!("rewrite layer: {}", layer.key());
}
self.check_compaction_space(&job_desc.selected_layers)
.await?;
// Generate statistics for the compaction
for layer in &layer_selection {
for layer in &job_desc.selected_layers {
let desc = layer.layer_desc();
if desc.is_delta() {
stat.visit_delta_layer(desc.file_size());
@@ -1906,25 +1915,25 @@ impl Timeline {
// Step 1: construct a k-merge iterator over all layers.
// Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
let layer_names: Vec<crate::tenant::storage_layer::LayerName> = layer_selection
let layer_names = job_desc
.selected_layers
.iter()
.map(|layer| layer.layer_desc().layer_name())
.collect_vec();
if let Some(err) = check_valid_layermap(&layer_names) {
bail!("cannot run gc-compaction because {}", err);
warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err);
}
// The maximum LSN we are processing in this compaction loop
let end_lsn = layer_selection
let end_lsn = job_desc
.selected_layers
.iter()
.map(|l| l.layer_desc().lsn_range.end)
.max()
.unwrap();
// We don't want any of the produced layers to cover the full key range (i.e., MIN..MAX) b/c it will then be recognized
// as an L0 layer.
let mut delta_layers = Vec::new();
let mut image_layers = Vec::new();
let mut downloaded_layers = Vec::new();
for layer in &layer_selection {
for layer in &job_desc.selected_layers {
let resident_layer = layer.download_and_keep_resident().await?;
downloaded_layers.push(resident_layer);
}
@@ -1943,8 +1952,8 @@ impl Timeline {
dense_ks,
sparse_ks,
)?;
// Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas.
// Data of the same key.
// Step 2: Produce images+deltas.
let mut accumulated_values = Vec::new();
let mut last_key: Option<Key> = None;
@@ -1956,10 +1965,7 @@ impl Timeline {
self.conf,
self.timeline_id,
self.tenant_shard_id,
compaction_key_range
.as_ref()
.map(|x| x.start)
.unwrap_or(Key::MIN),
job_desc.compaction_key_range.start,
lowest_retain_lsn,
self.get_compaction_target_size(),
ctx,
@@ -1979,6 +1985,13 @@ impl Timeline {
)
.await?;
#[derive(Default)]
struct RewritingLayers {
before: Option<DeltaLayerWriter>,
after: Option<DeltaLayerWriter>,
}
let mut delta_layer_rewriters = HashMap::<Arc<PersistentLayerKey>, RewritingLayers>::new();
/// Returns None if there is no ancestor branch. Throw an error when the key is not found.
///
/// Currently, we always get the ancestor image for each key in the child branch no matter whether the image
@@ -2004,10 +2017,51 @@ impl Timeline {
// the key and LSN range are determined. However, to keep things simple here, we still
// create this writer, and discard the writer in the end.
while let Some((key, lsn, val)) = merge_iter.next().await? {
while let Some(((key, lsn, val), desc)) = merge_iter.next_with_trace().await? {
if cancel.is_cancelled() {
return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
}
if !job_desc.compaction_key_range.contains(&key) {
if !desc.is_delta {
continue;
}
let rewriter = delta_layer_rewriters.entry(desc.clone()).or_default();
let rewriter = if key < job_desc.compaction_key_range.start {
if rewriter.before.is_none() {
rewriter.before = Some(
DeltaLayerWriter::new(
self.conf,
self.timeline_id,
self.tenant_shard_id,
desc.key_range.start,
desc.lsn_range.clone(),
ctx,
)
.await?,
);
}
rewriter.before.as_mut().unwrap()
} else if key >= job_desc.compaction_key_range.end {
if rewriter.after.is_none() {
rewriter.after = Some(
DeltaLayerWriter::new(
self.conf,
self.timeline_id,
self.tenant_shard_id,
job_desc.compaction_key_range.end,
desc.lsn_range.clone(),
ctx,
)
.await?,
);
}
rewriter.after.as_mut().unwrap()
} else {
unreachable!()
};
rewriter.put_value(key, lsn, val, ctx).await?;
continue;
}
match val {
Value::Image(_) => stat.visit_image_key(&val),
Value::WalRecord(_) => stat.visit_wal_key(&val),
@@ -2018,35 +2072,27 @@ impl Timeline {
}
accumulated_values.push((key, lsn, val));
} else {
let last_key = last_key.as_mut().unwrap();
stat.on_unique_key_visited();
let skip_adding_key = if let Some(ref compaction_key_range) = compaction_key_range {
!compaction_key_range.contains(last_key)
} else {
false
};
if !skip_adding_key {
let retention = self
.generate_key_retention(
*last_key,
&accumulated_values,
gc_cutoff,
&retain_lsns_below_horizon,
COMPACTION_DELTA_THRESHOLD,
get_ancestor_image(self, *last_key, ctx).await?,
)
.await?;
// Put the image into the image layer. Currently we have a single big layer for the compaction.
retention
.pipe_to(
*last_key,
&mut delta_layer_writer,
image_layer_writer.as_mut(),
&mut stat,
ctx,
)
.await?;
}
let last_key: &mut Key = last_key.as_mut().unwrap();
stat.on_unique_key_visited(); // TODO: adjust statistics for partial compaction
let retention = self
.generate_key_retention(
*last_key,
&accumulated_values,
job_desc.gc_cutoff,
&job_desc.retain_lsns_below_horizon,
COMPACTION_DELTA_THRESHOLD,
get_ancestor_image(self, *last_key, ctx).await?,
)
.await?;
retention
.pipe_to(
*last_key,
&mut delta_layer_writer,
image_layer_writer.as_mut(),
&mut stat,
ctx,
)
.await?;
accumulated_values.clear();
*last_key = key;
accumulated_values.push((key, lsn, val));
@@ -2057,35 +2103,43 @@ impl Timeline {
let last_key = last_key.expect("no keys produced during compaction");
stat.on_unique_key_visited();
let skip_adding_key = if let Some(ref compaction_key_range) = compaction_key_range {
!compaction_key_range.contains(&last_key)
} else {
false
};
if !skip_adding_key {
let retention = self
.generate_key_retention(
last_key,
&accumulated_values,
gc_cutoff,
&retain_lsns_below_horizon,
COMPACTION_DELTA_THRESHOLD,
get_ancestor_image(self, last_key, ctx).await?,
)
.await?;
// Put the image into the image layer. Currently we have a single big layer for the compaction.
retention
.pipe_to(
last_key,
&mut delta_layer_writer,
image_layer_writer.as_mut(),
&mut stat,
ctx,
)
.await?;
}
let retention = self
.generate_key_retention(
last_key,
&accumulated_values,
job_desc.gc_cutoff,
&job_desc.retain_lsns_below_horizon,
COMPACTION_DELTA_THRESHOLD,
get_ancestor_image(self, last_key, ctx).await?,
)
.await?;
retention
.pipe_to(
last_key,
&mut delta_layer_writer,
image_layer_writer.as_mut(),
&mut stat,
ctx,
)
.await?;
// end: move the above part to the loop body
let mut rewrote_delta_layers = Vec::new();
for (key, writers) in delta_layer_rewriters {
if let Some(delta_writer_before) = writers.before {
let (desc, path) = delta_writer_before
.finish(job_desc.compaction_key_range.start, ctx)
.await?;
let layer = Layer::finish_creating(self.conf, self, desc, &path)?;
rewrote_delta_layers.push(layer);
}
if let Some(delta_writer_after) = writers.after {
let (desc, path) = delta_writer_after.finish(key.key_range.end, ctx).await?;
let layer = Layer::finish_creating(self.conf, self, desc, &path)?;
rewrote_delta_layers.push(layer);
}
}
let discard = |key: &PersistentLayerKey| {
let key = key.clone();
async move { KeyHistoryRetention::discard_key(&key, self, dry_run).await }
@@ -2093,10 +2147,7 @@ impl Timeline {
let produced_image_layers = if let Some(writer) = image_layer_writer {
if !dry_run {
let end_key = compaction_key_range
.as_ref()
.map(|x| x.end)
.unwrap_or(Key::MAX);
let end_key = job_desc.compaction_key_range.end;
writer
.finish_with_discard_fn(self, ctx, end_key, discard)
.await?
@@ -2117,10 +2168,8 @@ impl Timeline {
Vec::new()
};
if partial_compaction && !produced_delta_layers.is_empty() {
bail!("implementation error: partial compaction should not be producing delta layers (for now)");
}
// TODO: make image/delta/rewrote_delta layers generation atomic. At this point, we already generated resident layers, and if
// compaction is cancelled at this point, we might have some layers that are not cleaned up.
let mut compact_to = Vec::new();
let mut keep_layers = HashSet::new();
let produced_delta_layers_len = produced_delta_layers.len();
@@ -2128,52 +2177,84 @@ impl Timeline {
for action in produced_delta_layers {
match action {
BatchWriterResult::Produced(layer) => {
if cfg!(debug_assertions) {
info!("produced delta layer: {}", layer.layer_desc().key());
}
stat.produce_delta_layer(layer.layer_desc().file_size());
compact_to.push(layer);
}
BatchWriterResult::Discarded(l) => {
if cfg!(debug_assertions) {
info!("discarded delta layer: {}", l);
}
keep_layers.insert(l);
stat.discard_delta_layer();
}
}
}
for layer in &rewrote_delta_layers {
debug!(
"produced rewritten delta layer: {}",
layer.layer_desc().key()
);
}
compact_to.extend(rewrote_delta_layers);
for action in produced_image_layers {
match action {
BatchWriterResult::Produced(layer) => {
debug!("produced image layer: {}", layer.layer_desc().key());
stat.produce_image_layer(layer.layer_desc().file_size());
compact_to.push(layer);
}
BatchWriterResult::Discarded(l) => {
debug!("discarded image layer: {}", l);
keep_layers.insert(l);
stat.discard_image_layer();
}
}
}
let mut layer_selection = layer_selection;
layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));
if let Some(ref compaction_key_range) = compaction_key_range {
// Partial compaction might select more data than it processes, e.g., if
// the compaction_key_range only partially overlaps:
//
// [---compaction_key_range---]
// [---A----][----B----][----C----][----D----]
//
// A,B,C,D are all in the `layer_selection`. The created image layers contain
// whatever is needed from B, C, and from `----]` of A, and from `[--` of D.
//
// In contrast, `[--A-` and `--D----]` have not been processed, so, we must
// keep that data.
//
// The solution for now is to keep A and D completely.
// (layer_selection is what we'll remove from the layer map, so,
// retain what is _not_ fully covered by compaction_key_range).
layer_selection.retain(|x| {
let key_range = &x.layer_desc().key_range;
key_range.start >= compaction_key_range.start
&& key_range.end <= compaction_key_range.end
});
let mut layer_selection = job_desc.selected_layers;
// Partial compaction might select more data than it processes, e.g., if
// the compaction_key_range only partially overlaps:
//
// [---compaction_key_range---]
// [---A----][----B----][----C----][----D----]
//
// For delta layers, we will rewrite the layers so that it is cut exactly at
// the compaction key range, so we can always discard them. However, for image
// layers, as we do not rewrite them for now, we need to handle them differently.
// Assume image layers A, B, C, D are all in the `layer_selection`.
//
// The created image layers contain whatever is needed from B, C, and from
// `----]` of A, and from `[---` of D.
//
// In contrast, `[---A` and `D----]` have not been processed, so, we must
// keep that data.
//
// The solution for now is to keep A and D completely if they are image layers.
// (layer_selection is what we'll remove from the layer map, so, retain what
// is _not_ fully covered by compaction_key_range).
for layer in &layer_selection {
if !layer.layer_desc().is_delta() {
if !overlaps_with(
&layer.layer_desc().key_range,
&job_desc.compaction_key_range,
) {
bail!("violated constraint: image layer outside of compaction key range");
}
if !fully_contains(
&job_desc.compaction_key_range,
&layer.layer_desc().key_range,
) {
keep_layers.insert(layer.layer_desc().key());
}
}
}
layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));
info!(
"gc-compaction statistics: {}",
serde_json::to_string(&stat)?
@@ -2192,6 +2273,7 @@ impl Timeline {
// Step 3: Place back to the layer map.
{
// TODO: sanity check if the layer map is valid (i.e., should not have overlaps)
let mut guard = self.layers.write().await;
guard
.open_mut()?

View File

@@ -1,10 +1,8 @@
from __future__ import annotations
import enum
import os
from typing import TYPE_CHECKING
import pytest
from typing_extensions import override
if TYPE_CHECKING:
@@ -18,12 +16,15 @@ This fixture is used to determine which version of Postgres to use for tests.
# Inherit PgVersion from str rather than int to make it easier to pass as a command-line argument
# TODO: use enum.StrEnum for Python >= 3.11
@enum.unique
class PgVersion(str, enum.Enum):
V14 = "14"
V15 = "15"
V16 = "16"
V17 = "17"
# Default Postgres Version for tests that don't really depend on Postgres itself
DEFAULT = V16
# Instead of making version an optional parameter in methods, we can use this fake entry
# to explicitly rely on the default server version (could be different from pg_version fixture value)
NOT_SET = "<-POSTRGRES VERSION IS NOT SET->"
@@ -59,27 +60,3 @@ class PgVersion(str, enum.Enum):
# Make mypy happy
# See https://github.com/python/mypy/issues/3974
return None
DEFAULT_VERSION: PgVersion = PgVersion.V16
def skip_on_postgres(version: PgVersion, reason: str):
return pytest.mark.skipif(
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is version,
reason=reason,
)
def xfail_on_postgres(version: PgVersion, reason: str):
return pytest.mark.xfail(
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is version,
reason=reason,
)
def run_only_on_default_postgres(reason: str):
return pytest.mark.skipif(
PgVersion(os.environ.get("DEFAULT_PG_VERSION", DEFAULT_VERSION)) is not DEFAULT_VERSION,
reason=reason,
)

View File

@@ -25,6 +25,7 @@ from fixtures.pageserver.common_types import (
parse_delta_layer,
parse_image_layer,
)
from fixtures.pg_version import PgVersion
if TYPE_CHECKING:
from collections.abc import Iterable
@@ -37,6 +38,7 @@ if TYPE_CHECKING:
Fn = TypeVar("Fn", bound=Callable[..., Any])
COMPONENT_BINARIES = {
"storage_controller": ("storage_controller",),
"storage_broker": ("storage_broker",),
@@ -519,7 +521,7 @@ def assert_pageserver_backups_equal(left: Path, right: Path, skip_files: set[str
This is essentially:
lines=$(comm -3 \
<(mkdir left && cd left && tar xf "$left" && find . -type f -print0 | xargs sha256sum | sort -k2) \
<(mkdir left && cd left && tar xf "$left" && find . -type f -print0 | xargs sha256sum | sort -k2) \
<(mkdir right && cd right && tar xf "$right" && find . -type f -print0 | xargs sha256sum | sort -k2) \
| wc -l)
[ "$lines" = "0" ]
@@ -643,3 +645,40 @@ def allpairs_versions():
)
ids.append(f"combination_{''.join(cur_id)}")
return {"argnames": "combination", "argvalues": tuple(argvalues), "ids": ids}
def skip_on_postgres(version: PgVersion, reason: str):
return pytest.mark.skipif(
PgVersion(os.getenv("DEFAULT_PG_VERSION", PgVersion.DEFAULT)) is version,
reason=reason,
)
def xfail_on_postgres(version: PgVersion, reason: str):
return pytest.mark.xfail(
PgVersion(os.getenv("DEFAULT_PG_VERSION", PgVersion.DEFAULT)) is version,
reason=reason,
)
def run_only_on_default_postgres(reason: str):
return pytest.mark.skipif(
PgVersion(os.getenv("DEFAULT_PG_VERSION", PgVersion.DEFAULT)) is not PgVersion.DEFAULT,
reason=reason,
)
def skip_in_debug_build(reason: str):
return pytest.mark.skipif(
os.getenv("BUILD_TYPE", "debug") == "debug",
reason=reason,
)
def skip_on_ci(reason: str):
# `CI` variable is always set to `true` on GitHub
# Ref: https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables
return pytest.mark.skipif(
os.getenv("CI", "false") == "true",
reason=reason,
)

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import TYPE_CHECKING
@@ -14,7 +13,7 @@ from fixtures.neon_fixtures import (
PgBin,
wait_for_last_flush_lsn,
)
from fixtures.utils import get_scale_for_db, humantime_to_ms
from fixtures.utils import get_scale_for_db, humantime_to_ms, skip_on_ci
from performance.pageserver.util import (
setup_pageserver_with_tenants,
@@ -38,9 +37,8 @@ if TYPE_CHECKING:
@pytest.mark.parametrize("pgbench_scale", [get_scale_for_db(200)])
@pytest.mark.parametrize("n_tenants", [500])
@pytest.mark.timeout(10000)
@pytest.mark.skipif(
os.getenv("CI", "false") == "true",
reason="This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI",
@skip_on_ci(
"This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI"
)
def test_pageserver_characterize_throughput_with_n_tenants(
neon_env_builder: NeonEnvBuilder,
@@ -66,9 +64,8 @@ def test_pageserver_characterize_throughput_with_n_tenants(
@pytest.mark.parametrize("n_clients", [1, 64])
@pytest.mark.parametrize("n_tenants", [1])
@pytest.mark.timeout(2400)
@pytest.mark.skipif(
os.getenv("CI", "false") == "true",
reason="This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI",
@skip_on_ci(
"This test needs lot of resources and should run on dedicated HW, not in github action runners as part of CI"
)
def test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant(
neon_env_builder: NeonEnvBuilder,

View File

@@ -8,7 +8,7 @@ from fixtures.common_types import Lsn, TimelineId
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv
from fixtures.pageserver.http import TimelineCreate406
from fixtures.utils import query_scalar
from fixtures.utils import query_scalar, skip_in_debug_build
# Test the GC implementation when running with branching.
@@ -48,10 +48,8 @@ from fixtures.utils import query_scalar
# Because the delta layer D covering lsn1 is corrupted, creating a branch
# starting from lsn1 should return an error as follows:
# could not find data for key ... at LSN ..., for request at LSN ...
def test_branch_and_gc(neon_simple_env: NeonEnv, build_type: str):
if build_type == "debug":
pytest.skip("times out in debug builds")
@skip_in_debug_build("times out in debug builds")
def test_branch_and_gc(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http_client = env.pageserver.http_client()

View File

@@ -2,7 +2,6 @@ from __future__ import annotations
import enum
import json
import os
import time
from typing import TYPE_CHECKING
@@ -13,7 +12,7 @@ from fixtures.neon_fixtures import (
generate_uploads_and_deletions,
)
from fixtures.pageserver.http import PageserverApiException
from fixtures.utils import wait_until
from fixtures.utils import skip_in_debug_build, wait_until
from fixtures.workload import Workload
if TYPE_CHECKING:
@@ -32,7 +31,7 @@ AGGRESIVE_COMPACTION_TENANT_CONF = {
}
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
@skip_in_debug_build("only run with release build")
def test_pageserver_compaction_smoke(neon_env_builder: NeonEnvBuilder):
"""
This is a smoke test that compaction kicks in. The workload repeatedly churns

View File

@@ -12,6 +12,7 @@ from fixtures.neon_fixtures import (
NeonEnvBuilder,
)
from fixtures.pg_version import PgVersion
from fixtures.utils import skip_on_postgres
from pytest_httpserver import HTTPServer
from werkzeug.wrappers.request import Request
from werkzeug.wrappers.response import Response
@@ -41,17 +42,14 @@ def neon_env_builder_local(
return neon_env_builder
@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building")
@skip_on_postgres(PgVersion.V17, reason="TODO: PG17 extension building")
def test_remote_extensions(
httpserver: HTTPServer,
neon_env_builder_local: NeonEnvBuilder,
httpserver_listen_address,
pg_version,
):
if pg_version == PgVersion.V16:
pytest.skip("TODO: PG16 extension building")
if pg_version == PgVersion.V17:
pytest.skip("TODO: PG17 extension building")
# setup mock http server
# that expects request for anon.tar.zst
# and returns the requested file

View File

@@ -4,25 +4,22 @@ from collections.abc import Iterable
from dataclasses import dataclass
from typing import TYPE_CHECKING
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_flush_lsn
from fixtures.pageserver.http import HistoricLayerInfo, LayerMapInfo
from fixtures.utils import human_bytes
from fixtures.utils import human_bytes, skip_in_debug_build
if TYPE_CHECKING:
from typing import Union
def test_ingesting_large_batches_of_images(neon_env_builder: NeonEnvBuilder, build_type: str):
@skip_in_debug_build("debug run is unnecessarily slow")
def test_ingesting_large_batches_of_images(neon_env_builder: NeonEnvBuilder):
"""
Build a non-small GIN index which includes similarly batched up images in WAL stream as does pgvector
to show that we no longer create oversized layers.
"""
if build_type == "debug":
pytest.skip("debug run is unnecessarily slow")
minimum_initdb_size = 20 * 1024**2
checkpoint_distance = 32 * 1024**2
minimum_good_layer_size = checkpoint_distance * 0.9

View File

@@ -2,7 +2,6 @@ from __future__ import annotations
import os
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
@@ -10,12 +9,18 @@ from fixtures.neon_fixtures import (
wait_for_last_flush_lsn,
)
from fixtures.pg_version import PgVersion
from fixtures.utils import skip_on_postgres
@skip_on_postgres(
PgVersion.V14,
reason="pg_log_standby_snapshot() function is available since Postgres 16",
)
@skip_on_postgres(
PgVersion.V15,
reason="pg_log_standby_snapshot() function is available since Postgres 16",
)
def test_layer_bloating(neon_env_builder: NeonEnvBuilder, vanilla_pg):
if neon_env_builder.pg_version != PgVersion.V16:
pytest.skip("pg_log_standby_snapshot() function is available only in PG16")
env = neon_env_builder.init_start(
initial_tenant_conf={
"gc_period": "0s",

View File

@@ -2,7 +2,6 @@ from __future__ import annotations
import time
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
NeonEnvBuilder,
@@ -12,17 +11,13 @@ from fixtures.neon_fixtures import (
from fixtures.pageserver.common_types import parse_layer_file_name
from fixtures.pageserver.utils import wait_for_upload
from fixtures.remote_storage import RemoteStorageKind
from fixtures.utils import skip_in_debug_build
# Crates a few layers, ensures that we can evict them (removing locally but keeping track of them anyway)
# and then download them back.
def test_basic_eviction(
neon_env_builder: NeonEnvBuilder,
build_type: str,
):
if build_type == "debug":
pytest.skip("times out in debug builds")
@skip_in_debug_build("times out in debug builds")
def test_basic_eviction(neon_env_builder: NeonEnvBuilder):
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
env = neon_env_builder.init_start(

View File

@@ -5,8 +5,7 @@ import uuid
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.pg_version import run_only_on_default_postgres
from fixtures.utils import wait_until
from fixtures.utils import run_only_on_default_postgres, wait_until
@pytest.mark.parametrize("level", ["trace", "debug", "info", "warn", "error"])

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import os
import subprocess
from pathlib import Path
from typing import cast
@@ -15,7 +14,7 @@ from fixtures.neon_fixtures import (
parse_project_git_version_output,
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pg_version import PgVersion, skip_on_postgres
from fixtures.utils import run_only_on_default_postgres, skip_in_debug_build
def helper_compare_timeline_list(
@@ -195,10 +194,8 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
res.check_returncode()
@skip_on_postgres(PgVersion.V14, reason="does not use postgres")
@pytest.mark.skipif(
os.environ.get("BUILD_TYPE") == "debug", reason="unit test for test support, either build works"
)
@run_only_on_default_postgres(reason="does not use postgres")
@skip_in_debug_build("unit test for test support, either build works")
def test_parse_project_git_version_output_positive():
commit = "b6f77b5816cf1dba12a3bc8747941182ce220846"
@@ -217,10 +214,8 @@ def test_parse_project_git_version_output_positive():
assert parse_project_git_version_output(example) == commit
@skip_on_postgres(PgVersion.V14, reason="does not use postgres")
@pytest.mark.skipif(
os.environ.get("BUILD_TYPE") == "debug", reason="unit test for test support, either build works"
)
@run_only_on_default_postgres(reason="does not use postgres")
@skip_in_debug_build("unit test for test support, either build works")
def test_parse_project_git_version_output_local_docker():
"""
Makes sure the tests don't accept the default version in Dockerfile one gets without providing
@@ -234,10 +229,8 @@ def test_parse_project_git_version_output_local_docker():
assert input in str(e)
@skip_on_postgres(PgVersion.V14, reason="does not use postgres")
@pytest.mark.skipif(
os.environ.get("BUILD_TYPE") == "debug", reason="cli api sanity, either build works"
)
@run_only_on_default_postgres(reason="does not use postgres")
@skip_in_debug_build("unit test for test support, either build works")
def test_binaries_version_parses(neon_binpath: Path):
"""
Ensures that we can parse the actual outputs of --version from a set of binaries.

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
import asyncio
import os
import time
from typing import TYPE_CHECKING
@@ -16,7 +15,7 @@ from fixtures.neon_fixtures import (
)
from fixtures.pageserver.http import PageserverHttpClient
from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
from fixtures.utils import wait_until
from fixtures.utils import skip_in_debug_build, wait_until
if TYPE_CHECKING:
from typing import Optional
@@ -227,12 +226,9 @@ def test_idle_checkpoints(neon_env_builder: NeonEnvBuilder):
assert get_dirty_bytes(env) >= dirty_after_write
@pytest.mark.skipif(
# We have to use at least ~100MB of data to hit the lowest limit we can configure, which is
# prohibitively slow in debug mode
os.getenv("BUILD_TYPE") == "debug",
reason="Avoid running bulkier ingest tests in debug mode",
)
# We have to use at least ~100MB of data to hit the lowest limit we can configure, which is
# prohibitively slow in debug mode
@skip_in_debug_build("Avoid running bulkier ingest tests in debug mode")
def test_total_size_limit(neon_env_builder: NeonEnvBuilder):
"""
Test that checkpoints are done based on total ephemeral layer size, even if no one timeline is

View File

@@ -8,7 +8,7 @@ import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnvBuilder
from fixtures.remote_storage import s3_storage
from fixtures.utils import wait_until
from fixtures.utils import skip_in_debug_build, wait_until
# Test restarting page server, while safekeeper and compute node keep
@@ -155,12 +155,8 @@ def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
# safekeeper and compute node keep running.
@pytest.mark.timeout(540)
@pytest.mark.parametrize("shard_count", [None, 4])
def test_pageserver_chaos(
neon_env_builder: NeonEnvBuilder, build_type: str, shard_count: Optional[int]
):
if build_type == "debug":
pytest.skip("times out in debug builds")
@skip_in_debug_build("times out in debug builds")
def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder, shard_count: Optional[int]):
# same rationale as with the immediate stop; we might leave orphan layers behind.
neon_env_builder.disable_scrub_on_exit()
neon_env_builder.enable_pageserver_remote_storage(s3_storage())

View File

@@ -17,7 +17,7 @@ from fixtures.pageserver.utils import (
wait_for_upload_queue_empty,
)
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage
from fixtures.utils import wait_until
from fixtures.utils import skip_in_debug_build, wait_until
from fixtures.workload import Workload
from werkzeug.wrappers.request import Request
from werkzeug.wrappers.response import Response
@@ -765,7 +765,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
assert download_rate < expect_download_rate * 2
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
@skip_in_debug_build("only run with release build")
@pytest.mark.parametrize("via_controller", [True, False])
def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controller: bool):
"""

View File

@@ -3,7 +3,6 @@
#
from __future__ import annotations
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import TYPE_CHECKING, cast
@@ -19,6 +18,7 @@ from fixtures.neon_fixtures import (
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import s3_storage
from fixtures.utils import skip_in_debug_build
if TYPE_CHECKING:
from typing import Optional
@@ -329,7 +329,7 @@ def test_sql_regress(
post_checks(env, test_output_dir, DBNAME, endpoint)
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
@skip_in_debug_build("only run with release build")
def test_tx_abort_with_many_relations(
neon_env_builder: NeonEnvBuilder,
):

View File

@@ -30,7 +30,7 @@ import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import NeonEnv, wait_for_last_flush_lsn, wait_replica_caughtup
from fixtures.pg_version import PgVersion
from fixtures.utils import query_scalar, wait_until
from fixtures.utils import query_scalar, skip_on_postgres, wait_until
CREATE_SUBXACTS_FUNC = """
create or replace function create_subxacts(n integer) returns void as $$
@@ -137,6 +137,12 @@ def test_replica_start_scan_clog_crashed_xids(neon_simple_env: NeonEnv):
assert secondary_cur.fetchone() == (1,)
@skip_on_postgres(
PgVersion.V14, reason="pg_log_standby_snapshot() function is available since Postgres 16"
)
@skip_on_postgres(
PgVersion.V15, reason="pg_log_standby_snapshot() function is available since Postgres 16"
)
def test_replica_start_at_running_xacts(neon_simple_env: NeonEnv, pg_version):
"""
Test that starting a replica works right after the primary has
@@ -149,9 +155,6 @@ def test_replica_start_at_running_xacts(neon_simple_env: NeonEnv, pg_version):
"""
env = neon_simple_env
if env.pg_version == PgVersion.V14 or env.pg_version == PgVersion.V15:
pytest.skip("pg_log_standby_snapshot() function is available only in PG16")
primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
primary_conn = primary.connect()
primary_cur = primary_conn.cursor()

View File

@@ -20,7 +20,7 @@ from fixtures.neon_fixtures import (
)
from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty
from fixtures.remote_storage import s3_storage
from fixtures.utils import wait_until
from fixtures.utils import skip_in_debug_build, wait_until
from fixtures.workload import Workload
from pytest_httpserver import HTTPServer
from typing_extensions import override
@@ -853,12 +853,9 @@ def test_sharding_split_stripe_size(
wait_until(10, 1, assert_restart_notification)
@pytest.mark.skipif(
# The quantity of data isn't huge, but debug can be _very_ slow, and the things we're
# validating in this test don't benefit much from debug assertions.
os.getenv("BUILD_TYPE") == "debug",
reason="Avoid running bulkier ingest tests in debug mode",
)
# The quantity of data isn't huge, but debug can be _very_ slow, and the things we're
# validating in this test don't benefit much from debug assertions.
@skip_in_debug_build("Avoid running bulkier ingest tests in debug mode")
def test_sharding_ingest_layer_sizes(
neon_env_builder: NeonEnvBuilder,
):

View File

@@ -36,11 +36,12 @@ from fixtures.pageserver.utils import (
remote_storage_delete_key,
timeline_delete_wait_completed,
)
from fixtures.pg_version import PgVersion, run_only_on_default_postgres
from fixtures.pg_version import PgVersion
from fixtures.port_distributor import PortDistributor
from fixtures.remote_storage import RemoteStorageKind, s3_storage
from fixtures.storage_controller_proxy import StorageControllerProxy
from fixtures.utils import (
run_only_on_default_postgres,
run_pg_bench_small,
subprocess_capture,
wait_until,

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
@@ -21,7 +20,7 @@ from fixtures.pageserver.utils import (
wait_until_tenant_active,
)
from fixtures.pg_version import PgVersion
from fixtures.utils import wait_until
from fixtures.utils import skip_in_debug_build, wait_until
def test_empty_tenant_size(neon_env_builder: NeonEnvBuilder):
@@ -279,7 +278,7 @@ def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Pa
size_debug_file.write(size_debug)
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
@skip_in_debug_build("only run with release build")
def test_single_branch_get_tenant_size_grows(
neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_version: PgVersion
):

View File

@@ -869,8 +869,17 @@ def test_sharded_timeline_detach_ancestor(neon_env_builder: NeonEnvBuilder):
assert count == 10000
@pytest.mark.parametrize("mode", ["delete_timeline", "delete_tenant"])
@pytest.mark.parametrize("sharded", [False, True])
@pytest.mark.parametrize(
"mode, sharded",
[
("delete_timeline", False),
("delete_timeline", True),
("delete_tenant", False),
# the shared/exclusive lock for tenant is blocking this:
# timeline detach ancestor takes shared, delete tenant takes exclusive
# ("delete_tenant", True)
],
)
def test_timeline_detach_ancestor_interrupted_by_deletion(
neon_env_builder: NeonEnvBuilder, mode: str, sharded: bool
):
@@ -885,11 +894,6 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(
- shutdown winning over complete, see test_timeline_is_deleted_before_timeline_detach_ancestor_completes
"""
if sharded and mode == "delete_tenant":
# the shared/exclusive lock for tenant is blocking this:
# timeline detach ancestor takes shared, delete tenant takes exclusive
pytest.skip("tenant deletion while timeline ancestor detach is underway cannot happen")
shard_count = 2 if sharded else 1
neon_env_builder.num_pageservers = shard_count

View File

@@ -54,6 +54,8 @@ from fixtures.utils import (
PropagatingThread,
get_dir_size,
query_scalar,
run_only_on_default_postgres,
skip_in_debug_build,
start_in_background,
wait_until,
)
@@ -2104,10 +2106,9 @@ def test_pull_timeline_while_evicted(neon_env_builder: NeonEnvBuilder):
# The only way to verify this without manipulating time is to sleep for a while.
# In this test we sleep for 60 seconds, so this test takes at least 1 minute to run.
# This is longer than most other tests, we run it only for v16 to save CI resources.
@run_only_on_default_postgres("run only on release build to save CI resources")
@skip_in_debug_build("run only on release build to save CI resources")
def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):
if os.environ.get("PYTEST_CURRENT_TEST", "").find("[debug-pg16]") == -1:
pytest.skip("run only on debug postgres v16 to save CI resources")
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()

View File

@@ -14,6 +14,7 @@ from fixtures.common_types import Lsn, TenantId, TimelineId
from fixtures.log_helper import getLogger
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder, Safekeeper
from fixtures.remote_storage import RemoteStorageKind
from fixtures.utils import skip_in_debug_build
if TYPE_CHECKING:
from typing import Optional
@@ -760,10 +761,8 @@ async def run_wal_lagging(env: NeonEnv, endpoint: Endpoint, test_output_dir: Pat
# The test takes more than default 5 minutes on Postgres 16,
# see https://github.com/neondatabase/neon/issues/5305
@pytest.mark.timeout(600)
@skip_in_debug_build("times out in debug builds")
def test_wal_lagging(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, build_type: str):
if build_type == "debug":
pytest.skip("times out in debug builds")
neon_env_builder.num_safekeepers = 3
env = neon_env_builder.init_start()