From 982cb1c15da7d7002dfb2eafbb2d45c665f3bf76 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Tue, 19 Nov 2024 10:46:46 +0100 Subject: [PATCH 01/22] Move logic for ingest benchmark from GitHub workflow into python testcase (#9762) ## Problem The first version of the ingest benchmark had some parsing and reporting logic in shell script inside GitHub workflow. it is better to move that logic into a python testcase so that we can also run it locally. ## Summary of changes - Create new python testcase - invoke pgcopydb inside python test case - move the following logic into python testcase - determine backpressure - invoke pgcopydb and report its progress - parse pgcopydb log and extract metrics - insert metrics into perf test database - add additional column to perf test database that can receive endpoint ID used for pgcopydb run to have it available in grafana dashboard when retrieving other metrics for an endpoint ## Example run https://github.com/neondatabase/neon/actions/runs/11860622170/job/33056264386 --- .../actions/run-python-test-set/action.yml | 11 + .github/workflows/benchmarking.yml | 1 + .github/workflows/ingest_benchmark.yml | 277 ++---------------- scripts/ingest_perf_test_result.py | 10 +- test_runner/fixtures/benchmark_fixture.py | 6 + .../test_perf_ingest_using_pgcopydb.py | 267 +++++++++++++++++ 6 files changed, 324 insertions(+), 248 deletions(-) create mode 100644 test_runner/performance/test_perf_ingest_using_pgcopydb.py diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 037b9aeb1e..275f161019 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -48,6 +48,10 @@ inputs: description: 'benchmark durations JSON' required: false default: '{}' + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' runs: using: "composite" @@ -222,6 +226,13 @@ runs: # (for example if we didn't run the test for non build-and-test workflow) skip-if-does-not-exist: true + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report - name: Upload test results if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-store diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 68bc555982..0e3c31ec57 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -133,6 +133,7 @@ jobs: --ignore test_runner/performance/test_perf_pgvector_queries.py --ignore test_runner/performance/test_logical_replication.py --ignore test_runner/performance/test_physical_replication.py + --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index d770bb2bb5..1033dc6489 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -1,4 +1,4 @@ -name: Benchmarking +name: benchmarking ingest on: # uncomment to run on push for debugging your PR @@ -74,18 +74,16 @@ jobs: compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project and retrieve current backpressure seconds + - name: Initialize Neon project if: ${{ matrix.target_project == 'new_empty_project' }} env: - NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} + BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }} run: | echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" - BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV - echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" + echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - name: Create Neon Branch for large tenant if: ${{ matrix.target_project == 'large_existing_project' }} @@ -95,266 +93,55 @@ jobs: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project and retrieve current backpressure seconds + - name: Initialize Neon project if: ${{ matrix.target_project == 'large_existing_project' }} env: - NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} + BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }} run: | echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} # Extract the part before the database name - base_connstr="${NEW_PROJECT_CONNSTR%/*}" + base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}" # Extract the query parameters (if any) after the database name - query_params="${NEW_PROJECT_CONNSTR#*\?}" + query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}" # Reconstruct the new connection string - if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then + if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then new_connstr="${base_connstr}/neondb?${query_params}" else new_connstr="${base_connstr}/neondb" fi ${PSQL} "${new_connstr}" -c "drop database ludicrous;" ${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;" - if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then - NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}" + if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then + BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}" else - NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous" + BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous" fi - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" - BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV - echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV - - - - name: Create pgcopydb filter file - run: | - cat << EOF > /tmp/pgcopydb_filter.txt - [include-only-table] - public.events - public.emails - public.email_transmissions - public.payments - public.editions - public.edition_modules - public.sp_content - public.email_broadcasts - public.user_collections - public.devices - public.user_accounts - public.lessons - public.lesson_users - public.payment_methods - public.orders - public.course_emails - public.modules - public.users - public.module_users - public.courses - public.payment_gateway_keys - public.accounts - public.roles - public.payment_gateways - public.management - public.event_names - EOF + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" + echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - - name: Invoke pgcopydb + - name: Invoke pgcopydb + uses: ./.github/actions/run-python-test-set + with: + build_type: remote + test_selection: performance/test_perf_ingest_using_pgcopydb.py + run_in_parallel: false + extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb + pg_version: v16 + save_perf_report: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: - BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} - run: | - export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH} - export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}" - export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}" - export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7" - ${PG_CONFIG} --bindir - ${PGCOPYDB} --version - ${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \ - --index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \ - --use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log + BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} + TARGET_PROJECT_TYPE: ${{ matrix.target_project }} + # we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant + PLATFORM: "${{ matrix.target_project }}-us-east-2-staging" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - # create dummy pgcopydb log to test parsing - # - name: create dummy log for parser test - # run: | - # cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log - # 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb" - # 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60" - # 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60" - # 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb" - # 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database - # 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences - # 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB - # 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID - # 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid. - # 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk - # 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints) - # 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences - # 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions - # 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database - # 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data) - # 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' - # 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database - # 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump - # 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes - # 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum - # 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes - # 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes - # 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found. - # 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values - # 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database - # 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database - # 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump - # 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed - # 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes - - # OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration - # ------+--------+----------------------+-------+---------------+-------------------+---------+---------------------- - # 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m - # 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m - # 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s - # 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s - # 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s - # 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s - # 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s - # 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s - # 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s - # 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s - # 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms - # 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s - # 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s - # 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749 - # 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms - # 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696 - # 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032 - # 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms - # 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms - # 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms - # 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms - # 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms - # 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms - # 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms - # 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms - # 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms - - - # Step Connection Duration Transfer Concurrency - # -------------------------------------------------- ---------- ---------- ---------- ------------ - # Catalog Queries (table ordering, filtering, etc) source 12s 1 - # Dump Schema source 765ms 1 - # Prepare Schema target 466ms 1 - # COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12 - # COPY (cumulative) both 7h46m 1225 GB 4 - # CREATE INDEX (cumulative) target 4h36m 4 - # CONSTRAINTS (cumulative) target 8s493 4 - # VACUUM (cumulative) target 0ms 4 - # Reset Sequences both 60ms 1 - # Large Objects (cumulative) (null) 0ms 0 - # Finalize Schema both 14h01m 4 - # -------------------------------------------------- ---------- ---------- ---------- ------------ - # Total Wall Clock Duration both 16h49m 20 - - - # EOF - - - - name: show tables sizes and retrieve current backpressure seconds + - name: show tables sizes after ingest run: | export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+" - BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV - - - name: Parse pgcopydb log and report performance metrics - env: - PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }} - run: | - export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - - # Define the log file path - LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log" - - # Get the current git commit hash - git config --global --add safe.directory /__w/neon/neon - COMMIT_HASH=$(git rev-parse --short HEAD) - - # Define the platform and test suite - PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging" - SUIT="pgcopydb_ingest_bench" - - # Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds - convert_to_seconds() { - local duration=$1 - local total_seconds=0 - - # Check for hours (h) - if [[ "$duration" =~ ([0-9]+)h ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600)) - fi - - # Check for seconds (s) - if [[ "$duration" =~ ([0-9]+)s ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0})) - fi - - # Check for milliseconds (ms) (if applicable) - if [[ "$duration" =~ ([0-9]+)ms ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000)) - duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m - fi - - # Check for minutes (m) - must be checked after ms because m is contained in ms - if [[ "$duration" =~ ([0-9]+)m ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60)) - fi - - echo $total_seconds - } - - # Calculate the backpressure difference in seconds - BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}") - - # Insert the backpressure time difference into the performance database - if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then - PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \" - INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp) - VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now()); - \"" - echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds" - eval $PSQL_CMD - fi - - # Extract and process log lines - while IFS= read -r line; do - METRIC_NAME="" - # Match each desired line and extract the relevant information - if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then - METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)" - elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then - METRIC_NAME="COPY (cumulative)" - elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then - METRIC_NAME="CREATE INDEX (cumulative)" - elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then - METRIC_NAME="CONSTRAINTS (cumulative)" - elif [[ "$line" =~ Finalize\ Schema.* ]]; then - METRIC_NAME="Finalize Schema" - elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then - METRIC_NAME="Total Wall Clock Duration" - fi - - # If a metric was matched, insert it into the performance database - if [ -n "$METRIC_NAME" ]; then - DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1) - METRIC_VALUE=$(convert_to_seconds "$DURATION") - PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \" - INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp) - VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now()); - \"" - echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds" - eval $PSQL_CMD - fi - done < "$LOG_FILE" + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - name: Delete Neon Project if: ${{ always() && matrix.target_project == 'new_empty_project' }} diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py index 40071c01b0..804f8a3cde 100644 --- a/scripts/ingest_perf_test_result.py +++ b/scripts/ingest_perf_test_result.py @@ -25,7 +25,8 @@ CREATE TABLE IF NOT EXISTS perf_test_results ( metric_value NUMERIC, metric_unit VARCHAR(10), metric_report_type TEXT, - recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW() + recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + labels JSONB with default '{}' ) """ @@ -91,6 +92,7 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) "metric_unit": metric["unit"], "metric_report_type": metric["report"], "recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp), + "labels": json.dumps(metric.get("labels")), } args_list.append(values) @@ -105,7 +107,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) metric_value, metric_unit, metric_report_type, - recorded_at_timestamp + recorded_at_timestamp, + labels ) VALUES %s """, args_list, @@ -117,7 +120,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) %(metric_value)s, %(metric_unit)s, %(metric_report_type)s, - %(recorded_at_timestamp)s + %(recorded_at_timestamp)s, + %(labels)s )""", ) return len(args_list) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index d3419bd8b1..8e68775471 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -256,12 +256,17 @@ class NeonBenchmarker: metric_value: float, unit: str, report: MetricReport, + labels: Optional[ + dict[str, str] + ] = None, # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric ): """ Record a benchmark result. """ # just to namespace the value name = f"{self.PROPERTY_PREFIX}_{metric_name}" + if labels is None: + labels = {} self.property_recorder( name, { @@ -269,6 +274,7 @@ class NeonBenchmarker: "value": metric_value, "unit": unit, "report": report, + "labels": labels, }, ) diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py new file mode 100644 index 0000000000..2f4574ba88 --- /dev/null +++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py @@ -0,0 +1,267 @@ +import os +import re +import subprocess +import sys +import textwrap +from pathlib import Path +from typing import cast +from urllib.parse import urlparse + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.utils import humantime_to_ms + + +def setup_environment(): + """Set up necessary environment variables for pgcopydb execution. + + Expects the following variables to be set in the environment: + - PG_CONFIG: e.g. /tmp/neon/pg_install/v16/bin/pg_config + - PSQL: e.g. /tmp/neon/pg_install/v16/bin/psql + - PG_16_LIB_PATH: e.g. /tmp/neon/pg_install/v16/lib + - PGCOPYDB: e.g. /pgcopydb/bin/pgcopydb + - PGCOPYDB_LIB_PATH: e.g. /pgcopydb/lib + - BENCHMARK_INGEST_SOURCE_CONNSTR + - BENCHMARK_INGEST_TARGET_CONNSTR + - PERF_TEST_RESULT_CONNSTR + - TARGET_PROJECT_TYPE + + """ + # Ensure required environment variables are set + required_env_vars = [ + "PGCOPYDB", + "PGCOPYDB_LIB_PATH", + "PG_CONFIG", + "PSQL", + "PG_16_LIB_PATH", + "BENCHMARK_INGEST_SOURCE_CONNSTR", + "BENCHMARK_INGEST_TARGET_CONNSTR", + "PERF_TEST_RESULT_CONNSTR", + "TARGET_PROJECT_TYPE", + ] + for var in required_env_vars: + if not os.getenv(var): + raise OSError(f"Required environment variable '{var}' is not set.") + + +def build_pgcopydb_command(pgcopydb_filter_file: Path, test_output_dir: Path): + """Builds the pgcopydb command to execute using existing environment variables.""" + pgcopydb_executable = os.getenv("PGCOPYDB") + if not pgcopydb_executable: + raise OSError("PGCOPYDB environment variable is not set.") + + return [ + pgcopydb_executable, + "clone", + "--dir", + str(test_output_dir), + "--skip-vacuum", + "--no-owner", + "--no-acl", + "--skip-db-properties", + "--table-jobs", + "4", + "--index-jobs", + "4", + "--restore-jobs", + "4", + "--split-tables-larger-than", + "10GB", + "--skip-extensions", + "--use-copy-binary", + "--filters", + str(pgcopydb_filter_file), + ] + + +@pytest.fixture() # must be function scoped because test_output_dir is function scoped +def pgcopydb_filter_file(test_output_dir: Path) -> Path: + """Creates the pgcopydb_filter.txt file required by pgcopydb.""" + filter_content = textwrap.dedent("""\ + [include-only-table] + public.events + public.emails + public.email_transmissions + public.payments + public.editions + public.edition_modules + public.sp_content + public.email_broadcasts + public.user_collections + public.devices + public.user_accounts + public.lessons + public.lesson_users + public.payment_methods + public.orders + public.course_emails + public.modules + public.users + public.module_users + public.courses + public.payment_gateway_keys + public.accounts + public.roles + public.payment_gateways + public.management + public.event_names + """) + filter_path = test_output_dir / "pgcopydb_filter.txt" + filter_path.write_text(filter_content) + return filter_path + + +def get_backpressure_time(connstr): + """Executes a query to get the backpressure throttling time in seconds.""" + query = "select backpressure_throttling_time()/1000000;" + psql_path = os.getenv("PSQL") + if psql_path is None: + raise OSError("The PSQL environment variable is not set.") + result = subprocess.run( + [psql_path, connstr, "-t", "-c", query], capture_output=True, text=True, check=True + ) + return float(result.stdout.strip()) + + +def run_command_and_log_output(command, log_file_path: Path): + """ + Runs a command and logs output to both a file and GitHub Actions console. + + Args: + command (list): The command to execute. + log_file_path (Path): Path object for the log file where output is written. + """ + # Define a list of necessary environment variables for pgcopydb + custom_env_vars = { + "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}", + "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")), + "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")), + "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7", + } + # Combine the current environment with custom variables + env = os.environ.copy() + env.update(custom_env_vars) + + with log_file_path.open("w") as log_file: + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env + ) + + assert process.stdout is not None, "process.stdout should not be None" + + # Stream output to both log file and console + for line in process.stdout: + print(line, end="") # Stream to GitHub Actions log + sys.stdout.flush() + log_file.write(line) # Write to log file + + process.wait() # Wait for the process to finish + if process.returncode != 0: + raise subprocess.CalledProcessError(process.returncode, command) + + +def parse_log_and_report_metrics( + zenbenchmark: NeonBenchmarker, log_file_path: Path, backpressure_time_diff: float +): + """Parses the pgcopydb log file for performance metrics and reports them to the database.""" + metrics = {"backpressure_time": backpressure_time_diff} + + # Define regex patterns to capture metrics + metric_patterns = { + "COPY_INDEX_CONSTRAINTS_VACUUM": re.compile( + r"COPY, INDEX, CONSTRAINTS, VACUUM \(wall clock\).*" + ), + "COPY_CUMULATIVE": re.compile(r"COPY \(cumulative\).*"), + "CREATE_INDEX_CUMULATIVE": re.compile(r"CREATE INDEX \(cumulative\).*"), + "CONSTRAINTS_CUMULATIVE": re.compile(r"CONSTRAINTS \(cumulative\).*"), + "FINALIZE_SCHEMA": re.compile(r"Finalize Schema.*"), + "TOTAL_DURATION": re.compile(r"Total Wall Clock Duration.*"), + } + + # Parse log file + with log_file_path.open("r") as log_file: + for line in log_file: + for metric_name, pattern in metric_patterns.items(): + if pattern.search(line): + # Extract duration and convert it to seconds + duration_match = re.search(r"\d+h\d+m|\d+s|\d+ms|\d+\.\d+s", line) + if duration_match: + duration_str = duration_match.group(0) + parts = re.findall(r"\d+[a-zA-Z]+", duration_str) + rust_like_humantime = " ".join(parts) + duration_seconds = humantime_to_ms(rust_like_humantime) / 1000.0 + metrics[metric_name] = duration_seconds + + endpoint_id = {"endpoint_id": get_endpoint_id()} + for metric_name, duration_seconds in metrics.items(): + zenbenchmark.record( + metric_name, duration_seconds, "s", MetricReport.LOWER_IS_BETTER, endpoint_id + ) + + +def get_endpoint_id(): + """Extracts and returns the first segment of the hostname from the PostgreSQL URI stored in BENCHMARK_INGEST_TARGET_CONNSTR.""" + connstr = os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR") + if connstr is None: + raise OSError("BENCHMARK_INGEST_TARGET_CONNSTR environment variable is not set.") + + # Parse the URI + parsed_url = urlparse(connstr) + + # Extract the hostname and split to get the first segment + hostname = parsed_url.hostname + if hostname is None: + raise ValueError("Unable to parse hostname from BENCHMARK_INGEST_TARGET_CONNSTR") + + # Split the hostname by dots and take the first segment + endpoint_id = hostname.split(".")[0] + + return endpoint_id + + +@pytest.fixture() # must be function scoped because test_output_dir is function scoped +def log_file_path(test_output_dir): + """Fixture to provide a temporary log file path.""" + if not os.getenv("TARGET_PROJECT_TYPE"): + raise OSError("Required environment variable 'TARGET_PROJECT_TYPE' is not set.") + return (test_output_dir / os.getenv("TARGET_PROJECT_TYPE")).with_suffix(".log") + + +@pytest.mark.remote_cluster +def test_ingest_performance_using_pgcopydb( + zenbenchmark: NeonBenchmarker, + log_file_path: Path, + pgcopydb_filter_file: Path, + test_output_dir: Path, +): + """ + Simulate project migration from another PostgreSQL provider to Neon. + + Measure performance for Neon ingest steps + - COPY + - CREATE INDEX + - CREATE CONSTRAINT + - VACUUM ANALYZE + - create foreign keys + + Use pgcopydb to copy data from the source database to the destination database. + """ + # Set up environment and create filter file + setup_environment() + + # Get backpressure time before ingest + backpressure_time_before = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")) + + # Build and run the pgcopydb command + command = build_pgcopydb_command(pgcopydb_filter_file, test_output_dir) + try: + run_command_and_log_output(command, log_file_path) + except subprocess.CalledProcessError as e: + pytest.fail(f"pgcopydb command failed with error: {e}") + + # Get backpressure time after ingest and calculate the difference + backpressure_time_after = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")) + backpressure_time_diff = backpressure_time_after - backpressure_time_before + + # Parse log file and report metrics, including backpressure time difference + parse_log_and_report_metrics(zenbenchmark, log_file_path, backpressure_time_diff) From c9acd214ae37272f3a5eac97aef557b73a692039 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Tue, 19 Nov 2024 11:56:40 +0200 Subject: [PATCH 02/22] Do not create DSM segment for wal_redo_postgres (#9793) ## Problem See https://github.com/neondatabase/neon/issues/9738 ## Summary of changes Do not create DSM segment for wal_redo Postgres --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon_walredo/walredoproc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pgxn/neon_walredo/walredoproc.c b/pgxn/neon_walredo/walredoproc.c index 37abb3fa03..619b7255ae 100644 --- a/pgxn/neon_walredo/walredoproc.c +++ b/pgxn/neon_walredo/walredoproc.c @@ -453,7 +453,6 @@ WalRedoMain(int argc, char *argv[]) static void CreateFakeSharedMemoryAndSemaphores(void) { - PGShmemHeader *shim = NULL; PGShmemHeader *hdr; Size size; int numSemas; @@ -486,7 +485,6 @@ CreateFakeSharedMemoryAndSemaphores(void) hdr->totalsize = size; hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); - shim = hdr; UsedShmemSegAddr = hdr; UsedShmemSegID = (unsigned long) 42; /* not relevant for non-shared memory */ } @@ -523,8 +521,6 @@ CreateFakeSharedMemoryAndSemaphores(void) */ InitShmemIndex(); - dsm_shmem_init(); - /* * Set up xlog, clog, and buffers */ @@ -599,10 +595,6 @@ CreateFakeSharedMemoryAndSemaphores(void) ShmemBackendArrayAllocation(); #endif - /* Initialize dynamic shared memory facilities. */ - if (!IsUnderPostmaster) - dsm_postmaster_startup(shim); - /* * Now give loadable modules a chance to set up their shmem allocations */ From 37b97b3a682ea8496a98e62d3f2b89906a3eaa64 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Tue, 19 Nov 2024 13:58:11 +0000 Subject: [PATCH 03/22] chore(local_proxy): reduce some startup logging (#9798) Currently, local_proxy will write an error log if it doesn't find the config file. This is expected for startup, so it's just noise. It is an error if we do receive an explicit SIGHUP though. I've also demoted the build info logs to be debug level. We don't need them in the compute image since we have other ways to determine what code is running. Lastly, I've demoted SIGHUP signal handling from warn to info, since it's not really a warning event. See https://github.com/neondatabase/cloud/issues/10880 for more details --- proxy/src/bin/local_proxy.rs | 37 ++++++++++++++++++++++++++++++------ proxy/src/jemalloc.rs | 2 +- proxy/src/signals.rs | 4 ++-- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs index fbdb1dec15..41b0e11e85 100644 --- a/proxy/src/bin/local_proxy.rs +++ b/proxy/src/bin/local_proxy.rs @@ -32,11 +32,12 @@ project_git_version!(GIT_VERSION); project_build_tag!(BUILD_TAG); use clap::Parser; +use thiserror::Error; use tokio::net::TcpListener; use tokio::sync::Notify; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; use utils::sentry_init::init_sentry; use utils::{pid_file, project_build_tag, project_git_version}; @@ -124,8 +125,8 @@ async fn main() -> anyhow::Result<()> { Metrics::install(Arc::new(ThreadPoolMetrics::new(0))); - info!("Version: {GIT_VERSION}"); - info!("Build_tag: {BUILD_TAG}"); + debug!("Version: {GIT_VERSION}"); + debug!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { revision: GIT_VERSION, build_tag: BUILD_TAG, @@ -305,26 +306,46 @@ fn build_auth_backend( Ok(Box::leak(Box::new(auth_backend))) } +#[derive(Error, Debug)] +enum RefreshConfigError { + #[error(transparent)] + Read(#[from] std::io::Error), + #[error(transparent)] + Parse(#[from] serde_json::Error), + #[error(transparent)] + Validate(anyhow::Error), +} + async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc) { + let mut init = true; loop { rx.notified().await; match refresh_config_inner(&path).await { Ok(()) => {} + // don't log for file not found errors if this is the first time we are checking + // for computes that don't use local_proxy, this is not an error. + Err(RefreshConfigError::Read(e)) + if init && e.kind() == std::io::ErrorKind::NotFound => + { + debug!(error=?e, ?path, "could not read config file"); + } Err(e) => { error!(error=?e, ?path, "could not read config file"); } } + + init = false; } } -async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> { +async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> { let bytes = tokio::fs::read(&path).await?; let data: LocalProxySpec = serde_json::from_slice(&bytes)?; let mut jwks_set = vec![]; - for jwks in data.jwks.into_iter().flatten() { + fn parse_jwks_settings(jwks: compute_api::spec::JwksSettings) -> anyhow::Result { let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?; ensure!( @@ -367,7 +388,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> { } } - jwks_set.push(JwksSettings { + Ok(JwksSettings { id: jwks.id, jwks_url, provider_name: jwks.provider_name, @@ -381,6 +402,10 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> { }) } + for jwks in data.jwks.into_iter().flatten() { + jwks_set.push(parse_jwks_settings(jwks).map_err(RefreshConfigError::Validate)?); + } + info!("successfully loaded new config"); JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set }))); diff --git a/proxy/src/jemalloc.rs b/proxy/src/jemalloc.rs index 0fae78b60c..9888458ee2 100644 --- a/proxy/src/jemalloc.rs +++ b/proxy/src/jemalloc.rs @@ -38,7 +38,7 @@ where impl MetricRecorder { pub fn new() -> Result { - tracing::info!( + tracing::debug!( config = config::malloc_conf::read()?, version = version::read()?, "starting jemalloc recorder" diff --git a/proxy/src/signals.rs b/proxy/src/signals.rs index 514a83d5eb..0b675683c0 100644 --- a/proxy/src/signals.rs +++ b/proxy/src/signals.rs @@ -2,7 +2,7 @@ use std::convert::Infallible; use anyhow::bail; use tokio_util::sync::CancellationToken; -use tracing::warn; +use tracing::{info, warn}; /// Handle unix signals appropriately. pub async fn handle( @@ -22,7 +22,7 @@ where tokio::select! { // Hangup is commonly used for config reload. _ = hangup.recv() => { - warn!("received SIGHUP"); + info!("received SIGHUP"); refresh_config(); } // Shut down the whole application. From 191f745c814e475b6f55e243ac5606cf2089dd64 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Tue, 19 Nov 2024 13:58:26 +0000 Subject: [PATCH 04/22] fix(proxy/auth_broker): ignore -pooler suffix (#9800) Fixes https://github.com/neondatabase/cloud/issues/20400 We cannot mix local_proxy and pgbouncer, so we are filtering out the `-pooler` suffix prior to calling wake_compute. --- proxy/src/serverless/backend.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 7fc5bd236d..5e9fd151ae 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -33,7 +33,7 @@ use crate::intern::EndpointIdInt; use crate::proxy::connect_compute::ConnectMechanism; use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute}; use crate::rate_limiter::EndpointRateLimiter; -use crate::types::{EndpointId, Host}; +use crate::types::{EndpointId, Host, LOCAL_PROXY_SUFFIX}; pub(crate) struct PoolingBackend { pub(crate) http_conn_pool: Arc>, @@ -215,7 +215,10 @@ impl PoolingBackend { let backend = self.auth_backend.as_ref().map(|()| ComputeCredentials { info: ComputeUserInfo { user: conn_info.user_info.user.clone(), - endpoint: EndpointId::from(format!("{}-local-proxy", conn_info.user_info.endpoint)), + endpoint: EndpointId::from(format!( + "{}{LOCAL_PROXY_SUFFIX}", + conn_info.user_info.endpoint.normalize() + )), options: conn_info.user_info.options.clone(), }, keys: crate::auth::backend::ComputeCredentialKeys::None, From ada84400b7fd7f0e5e1c0f5f9758c9261b0674e9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 19 Nov 2024 17:01:05 +0200 Subject: [PATCH 05/22] PostgreSQL minor version updates (17.2, 16.6, 15.10, 14.15) (#9795) The community decided to make a new off-schedule release due to ABI breakage in last week's release. We're not affected by the ABI breakage because we rebuild all extensions in our docker images, but let's stay up-to-date. There were a few other fixes in the release too. --- vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 16 ++++++++-------- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index c5e0d642ef..e54af35045 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit c5e0d642efb02e4bfedc283b0a7707fe6c79cc89 +Subproject commit e54af3504513b1f44c0e0f68791a0d6d4210e948 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 1feff6b60f..29bf1f04a5 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 1feff6b60f07cb71b665d0f5ead71a4320a71743 +Subproject commit 29bf1f04a5628618b4c7972fed6f87065e3750ce diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index b0b693ea29..b7e9ac3eb9 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit b0b693ea298454e95e6b154780d1fd586a244dfd +Subproject commit b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index aa2e29f2b6..a05dc1378d 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit aa2e29f2b6952140dfe51876bbd11054acae776f +Subproject commit a05dc1378dd822276dc99cb5e888f905d3527597 diff --git a/vendor/revisions.json b/vendor/revisions.json index a1f2bc5dd1..7243ba8716 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ - "17.1", - "aa2e29f2b6952140dfe51876bbd11054acae776f" + "17.2", + "a05dc1378dd822276dc99cb5e888f905d3527597" ], "v16": [ - "16.5", - "b0b693ea298454e95e6b154780d1fd586a244dfd" + "16.6", + "b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34" ], "v15": [ - "15.9", - "1feff6b60f07cb71b665d0f5ead71a4320a71743" + "15.10", + "29bf1f04a5628618b4c7972fed6f87065e3750ce" ], "v14": [ - "14.14", - "c5e0d642efb02e4bfedc283b0a7707fe6c79cc89" + "14.15", + "e54af3504513b1f44c0e0f68791a0d6d4210e948" ] } From a8ac895b83cd7339398d153b8ce73db959c21686 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Tue, 19 Nov 2024 18:22:51 +0100 Subject: [PATCH 06/22] re-acquire S3 OIDC token after long running tests for report upload to S3 (#9799) ## Problem If a benchmark or test-case runs longer than the AWS OIDC token lifetime successive upload of test reports to S3 fail - example: https://github.com/neondatabase/neon/actions/runs/11905529176/job/33176168174#step:9:243 ## Summary of changes In actions that require access to S3 and which are invoked after a long running python testcase we re-acquire the OIDC token explicitly. Note that we need to pass down the aws_oicd_role_arn from the workflow to the action because actions have no access to GitHub vars for security reasons. Sample run https://github.com/neondatabase/neon/actions/runs/11912328276/job/33195676867 --- .../actions/allure-report-generate/action.yml | 12 ++++++++++ .../actions/allure-report-store/action.yml | 12 ++++++++++ .github/workflows/benchmarking.yml | 24 +++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml index 16b6e71498..d1d09223db 100644 --- a/.github/actions/allure-report-generate/action.yml +++ b/.github/actions/allure-report-generate/action.yml @@ -7,6 +7,10 @@ inputs: type: boolean required: false default: false + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' outputs: base-url: @@ -79,6 +83,14 @@ runs: ALLURE_VERSION: 2.27.0 ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777 + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report + # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this - name: Acquire lock shell: bash -euxo pipefail {0} diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml index df4a6712ac..9c376f420a 100644 --- a/.github/actions/allure-report-store/action.yml +++ b/.github/actions/allure-report-store/action.yml @@ -8,6 +8,10 @@ inputs: unique-key: description: 'string to distinguish different results in the same run' required: true + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' runs: using: "composite" @@ -31,6 +35,14 @@ runs: env: REPORT_DIR: ${{ inputs.report-dir }} + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report + - name: Upload test results shell: bash -euxo pipefail {0} run: | diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 0e3c31ec57..0289f552f9 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -122,6 +122,7 @@ jobs: run_in_parallel: false save_perf_report: ${{ env.SAVE_PERF_REPORT }} pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # Set --sparse-ordering option of pytest-order plugin # to ensure tests are running in order of appears in the file. # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests @@ -150,6 +151,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -211,6 +214,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -227,6 +231,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -238,6 +243,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -446,6 +452,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -460,6 +467,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -474,6 +482,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -490,6 +499,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -600,6 +611,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -614,6 +626,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -623,6 +636,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -724,6 +739,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 43200 -k test_clickbench pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -736,6 +752,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -838,6 +856,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_tpch pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -848,6 +867,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -936,6 +957,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_user_examples pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -945,6 +967,8 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} From 15468cd23c8398ad37cc568e2140fd5413c4653d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 19:08:00 +0000 Subject: [PATCH 07/22] build(deps): bump aiohttp from 3.10.2 to 3.10.11 (#9794) --- poetry.lock | 459 +++++++++++++++++++++++++++++++------------------ pyproject.toml | 2 +- 2 files changed, 288 insertions(+), 173 deletions(-) diff --git a/poetry.lock b/poetry.lock index d869761e8e..6171f92391 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,97 +13,112 @@ files = [ [[package]] name = "aiohttp" -version = "3.10.2" +version = "3.10.11" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" files = [ - {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95213b3d79c7e387144e9cb7b9d2809092d6ff2c044cb59033aedc612f38fb6d"}, - {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1aa005f060aff7124cfadaa2493f00a4e28ed41b232add5869e129a2e395935a"}, - {file = "aiohttp-3.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eabe6bf4c199687592f5de4ccd383945f485779c7ffb62a9b9f1f8a3f9756df8"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96e010736fc16d21125c7e2dc5c350cd43c528b85085c04bf73a77be328fe944"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99f81f9c1529fd8e03be4a7bd7df32d14b4f856e90ef6e9cbad3415dbfa9166c"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d611d1a01c25277bcdea06879afbc11472e33ce842322496b211319aa95441bb"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00191d38156e09e8c81ef3d75c0d70d4f209b8381e71622165f22ef7da6f101"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74c091a5ded6cb81785de2d7a8ab703731f26de910dbe0f3934eabef4ae417cc"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:18186a80ec5a701816adbf1d779926e1069392cf18504528d6e52e14b5920525"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5a7ceb2a0d2280f23a02c64cd0afdc922079bb950400c3dd13a1ab2988428aac"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8bd7be6ff6c162a60cb8fce65ee879a684fbb63d5466aba3fa5b9288eb04aefa"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:fae962b62944eaebff4f4fddcf1a69de919e7b967136a318533d82d93c3c6bd1"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a0fde16d284efcacbe15fb0c1013f0967b6c3e379649239d783868230bf1db42"}, - {file = "aiohttp-3.10.2-cp310-cp310-win32.whl", hash = "sha256:f81cd85a0e76ec7b8e2b6636fe02952d35befda4196b8c88f3cec5b4fb512839"}, - {file = "aiohttp-3.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:54ba10eb5a3481c28282eb6afb5f709aedf53cf9c3a31875ffbdc9fc719ffd67"}, - {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:87fab7f948e407444c2f57088286e00e2ed0003ceaf3d8f8cc0f60544ba61d91"}, - {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ec6ad66ed660d46503243cbec7b2b3d8ddfa020f984209b3b8ef7d98ce69c3f2"}, - {file = "aiohttp-3.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a4be88807283bd96ae7b8e401abde4ca0bab597ba73b5e9a2d98f36d451e9aac"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01c98041f90927c2cbd72c22a164bb816fa3010a047d264969cf82e1d4bcf8d1"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54e36c67e1a9273ecafab18d6693da0fb5ac48fd48417e4548ac24a918c20998"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7de3ddb6f424af54535424082a1b5d1ae8caf8256ebd445be68c31c662354720"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dd9c7db94b4692b827ce51dcee597d61a0e4f4661162424faf65106775b40e7"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e57e21e1167705f8482ca29cc5d02702208d8bf4aff58f766d94bcd6ead838cd"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a1a50e59b720060c29e2951fd9f13c01e1ea9492e5a527b92cfe04dd64453c16"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:686c87782481fda5ee6ba572d912a5c26d9f98cc5c243ebd03f95222af3f1b0f"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:dafb4abb257c0ed56dc36f4e928a7341b34b1379bd87e5a15ce5d883c2c90574"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:494a6f77560e02bd7d1ab579fdf8192390567fc96a603f21370f6e63690b7f3d"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6fe8503b1b917508cc68bf44dae28823ac05e9f091021e0c41f806ebbb23f92f"}, - {file = "aiohttp-3.10.2-cp311-cp311-win32.whl", hash = "sha256:4ddb43d06ce786221c0dfd3c91b4892c318eaa36b903f7c4278e7e2fa0dd5102"}, - {file = "aiohttp-3.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:ca2f5abcb0a9a47e56bac173c01e9f6c6e7f27534d91451c5f22e6a35a5a2093"}, - {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:14eb6b17f6246959fb0b035d4f4ae52caa870c4edfb6170aad14c0de5bfbf478"}, - {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:465e445ec348d4e4bd349edd8b22db75f025da9d7b6dc1369c48e7935b85581e"}, - {file = "aiohttp-3.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:341f8ece0276a828d95b70cd265d20e257f5132b46bf77d759d7f4e0443f2906"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c01fbb87b5426381cd9418b3ddcf4fc107e296fa2d3446c18ce6c76642f340a3"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c474af073e1a6763e1c5522bbb2d85ff8318197e4c6c919b8d7886e16213345"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d9076810a5621236e29b2204e67a68e1fe317c8727ee4c9abbfbb1083b442c38"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8f515d6859e673940e08de3922b9c4a2249653b0ac181169313bd6e4b1978ac"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:655e583afc639bef06f3b2446972c1726007a21003cd0ef57116a123e44601bc"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8da9449a575133828cc99985536552ea2dcd690e848f9d41b48d8853a149a959"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19073d57d0feb1865d12361e2a1f5a49cb764bf81a4024a3b608ab521568093a"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c8e98e1845805f184d91fda6f9ab93d7c7b0dddf1c07e0255924bfdb151a8d05"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:377220a5efde6f9497c5b74649b8c261d3cce8a84cb661be2ed8099a2196400a"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92f7f4a4dc9cdb5980973a74d43cdbb16286dacf8d1896b6c3023b8ba8436f8e"}, - {file = "aiohttp-3.10.2-cp312-cp312-win32.whl", hash = "sha256:9bb2834a6f11d65374ce97d366d6311a9155ef92c4f0cee543b2155d06dc921f"}, - {file = "aiohttp-3.10.2-cp312-cp312-win_amd64.whl", hash = "sha256:518dc3cb37365255708283d1c1c54485bbacccd84f0a0fb87ed8917ba45eda5b"}, - {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7f98e70bbbf693086efe4b86d381efad8edac040b8ad02821453083d15ec315f"}, - {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9f6f0b252a009e98fe84028a4ec48396a948e7a65b8be06ccfc6ef68cf1f614d"}, - {file = "aiohttp-3.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9360e3ffc7b23565600e729e8c639c3c50d5520e05fdf94aa2bd859eef12c407"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3988044d1635c7821dd44f0edfbe47e9875427464e59d548aece447f8c22800a"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a9d59da1543a6f1478c3436fd49ec59be3868bca561a33778b4391005e499d"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f49bdb94809ac56e09a310a62f33e5f22973d6fd351aac72a39cd551e98194"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfd2dca3f11c365d6857a07e7d12985afc59798458a2fdb2ffa4a0332a3fd43"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c1508ec97b2cd3e120bfe309a4ff8e852e8a7460f1ef1de00c2c0ed01e33c"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:49904f38667c44c041a0b44c474b3ae36948d16a0398a8f8cd84e2bb3c42a069"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:352f3a4e5f11f3241a49b6a48bc5b935fabc35d1165fa0d87f3ca99c1fcca98b"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:fc61f39b534c5d5903490478a0dd349df397d2284a939aa3cbaa2fb7a19b8397"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:ad2274e707be37420d0b6c3d26a8115295fe9d8e6e530fa6a42487a8ca3ad052"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c836bf3c7512100219fe1123743fd8dd9a2b50dd7cfb0c3bb10d041309acab4b"}, - {file = "aiohttp-3.10.2-cp38-cp38-win32.whl", hash = "sha256:53e8898adda402be03ff164b0878abe2d884e3ea03a4701e6ad55399d84b92dc"}, - {file = "aiohttp-3.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:7cc8f65f5b22304693de05a245b6736b14cb5bc9c8a03da6e2ae9ef15f8b458f"}, - {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9dfc906d656e14004c5bc672399c1cccc10db38df2b62a13fb2b6e165a81c316"}, - {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:91b10208b222ddf655c3a3d5b727879d7163db12b634492df41a9182a76edaae"}, - {file = "aiohttp-3.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fd16b5e1a7bdd14668cd6bde60a2a29b49147a535c74f50d8177d11b38433a7"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2bfdda4971bd79201f59adbad24ec2728875237e1c83bba5221284dbbf57bda"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69d73f869cf29e8a373127fc378014e2b17bcfbe8d89134bc6fb06a2f67f3cb3"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df59f8486507c421c0620a2c3dce81fbf1d54018dc20ff4fecdb2c106d6e6abc"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df930015db36b460aa9badbf35eccbc383f00d52d4b6f3de2ccb57d064a6ade"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:562b1153ab7f766ee6b8b357ec777a302770ad017cf18505d34f1c088fccc448"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d984db6d855de58e0fde1ef908d48fe9a634cadb3cf715962722b4da1c40619d"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:14dc3fcb0d877911d775d511eb617a486a8c48afca0a887276e63db04d3ee920"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b52a27a5c97275e254704e1049f4b96a81e67d6205f52fa37a4777d55b0e98ef"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:cd33d9de8cfd006a0d0fe85f49b4183c57e91d18ffb7e9004ce855e81928f704"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1238fc979160bc03a92fff9ad021375ff1c8799c6aacb0d8ea1b357ea40932bb"}, - {file = "aiohttp-3.10.2-cp39-cp39-win32.whl", hash = "sha256:e2f43d238eae4f0b04f58d4c0df4615697d4ca3e9f9b1963d49555a94f0f5a04"}, - {file = "aiohttp-3.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:947847f07a8f81d7b39b2d0202fd73e61962ebe17ac2d8566f260679e467da7b"}, - {file = "aiohttp-3.10.2.tar.gz", hash = "sha256:4d1f694b5d6e459352e5e925a42e05bac66655bfde44d81c59992463d2897014"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"}, + {file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"}, + {file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"}, + {file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"}, + {file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"}, + {file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"}, + {file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"}, + {file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"}, + {file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"}, + {file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"}, + {file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"}, + {file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"}, + {file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"}, + {file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"}, ] [package.dependencies] aiohappyeyeballs = ">=2.3.0" aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} +async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" -yarl = ">=1.0,<2.0" +yarl = ">=1.12.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] @@ -2078,6 +2093,113 @@ files = [ [package.extras] twisted = ["twisted"] +[[package]] +name = "propcache" +version = "0.2.0" +description = "Accelerated property cache" +optional = false +python-versions = ">=3.8" +files = [ + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"}, + {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"}, + {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"}, + {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"}, + {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"}, + {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"}, + {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"}, + {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"}, + {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"}, + {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"}, + {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"}, + {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"}, + {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"}, + {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"}, + {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"}, +] + [[package]] name = "psutil" version = "5.9.4" @@ -3307,106 +3429,99 @@ files = [ [[package]] name = "yarl" -version = "1.9.4" +version = "1.17.2" description = "Yet another URL library" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"}, - {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"}, - {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"}, - {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"}, - {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"}, - {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"}, - {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"}, - {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"}, - {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"}, - {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"}, - {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"}, - {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"}, - {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"}, - {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"}, - {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, - {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, + {file = "yarl-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:93771146ef048b34201bfa382c2bf74c524980870bb278e6df515efaf93699ff"}, + {file = "yarl-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8281db240a1616af2f9c5f71d355057e73a1409c4648c8949901396dc0a3c151"}, + {file = "yarl-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:170ed4971bf9058582b01a8338605f4d8c849bd88834061e60e83b52d0c76870"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc61b005f6521fcc00ca0d1243559a5850b9dd1e1fe07b891410ee8fe192d0c0"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:871e1b47eec7b6df76b23c642a81db5dd6536cbef26b7e80e7c56c2fd371382e"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a58a2f2ca7aaf22b265388d40232f453f67a6def7355a840b98c2d547bd037f"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:736bb076f7299c5c55dfef3eb9e96071a795cb08052822c2bb349b06f4cb2e0a"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8fd51299e21da709eabcd5b2dd60e39090804431292daacbee8d3dabe39a6bc0"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:358dc7ddf25e79e1cc8ee16d970c23faee84d532b873519c5036dbb858965795"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:50d866f7b1a3f16f98603e095f24c0eeba25eb508c85a2c5939c8b3870ba2df8"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8b9c4643e7d843a0dca9cd9d610a0876e90a1b2cbc4c5ba7930a0d90baf6903f"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d63123bfd0dce5f91101e77c8a5427c3872501acece8c90df457b486bc1acd47"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:4e76381be3d8ff96a4e6c77815653063e87555981329cf8f85e5be5abf449021"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:734144cd2bd633a1516948e477ff6c835041c0536cef1d5b9a823ae29899665b"}, + {file = "yarl-1.17.2-cp310-cp310-win32.whl", hash = "sha256:26bfb6226e0c157af5da16d2d62258f1ac578d2899130a50433ffee4a5dfa673"}, + {file = "yarl-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:76499469dcc24759399accd85ec27f237d52dec300daaca46a5352fcbebb1071"}, + {file = "yarl-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:792155279dc093839e43f85ff7b9b6493a8eaa0af1f94f1f9c6e8f4de8c63500"}, + {file = "yarl-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38bc4ed5cae853409cb193c87c86cd0bc8d3a70fd2268a9807217b9176093ac6"}, + {file = "yarl-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4a8c83f6fcdc327783bdc737e8e45b2e909b7bd108c4da1892d3bc59c04a6d84"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6d5fed96f0646bfdf698b0a1cebf32b8aae6892d1bec0c5d2d6e2df44e1e2d"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:782ca9c58f5c491c7afa55518542b2b005caedaf4685ec814fadfcee51f02493"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ff6af03cac0d1a4c3c19e5dcc4c05252411bf44ccaa2485e20d0a7c77892ab6e"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a3f47930fbbed0f6377639503848134c4aa25426b08778d641491131351c2c8"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1fa68a3c921365c5745b4bd3af6221ae1f0ea1bf04b69e94eda60e57958907f"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:187df91395c11e9f9dc69b38d12406df85aa5865f1766a47907b1cc9855b6303"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:93d1c8cc5bf5df401015c5e2a3ce75a5254a9839e5039c881365d2a9dcfc6dc2"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:11d86c6145ac5c706c53d484784cf504d7d10fa407cb73b9d20f09ff986059ef"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c42774d1d1508ec48c3ed29e7b110e33f5e74a20957ea16197dbcce8be6b52ba"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8e589379ef0407b10bed16cc26e7392ef8f86961a706ade0a22309a45414d7"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1056cadd5e850a1c026f28e0704ab0a94daaa8f887ece8dfed30f88befb87bb0"}, + {file = "yarl-1.17.2-cp311-cp311-win32.whl", hash = "sha256:be4c7b1c49d9917c6e95258d3d07f43cfba2c69a6929816e77daf322aaba6628"}, + {file = "yarl-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:ac8eda86cc75859093e9ce390d423aba968f50cf0e481e6c7d7d63f90bae5c9c"}, + {file = "yarl-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:dd90238d3a77a0e07d4d6ffdebc0c21a9787c5953a508a2231b5f191455f31e9"}, + {file = "yarl-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c74f0b0472ac40b04e6d28532f55cac8090e34c3e81f118d12843e6df14d0909"}, + {file = "yarl-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d486ddcaca8c68455aa01cf53d28d413fb41a35afc9f6594a730c9779545876"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25b7e93f5414b9a983e1a6c1820142c13e1782cc9ed354c25e933aebe97fcf2"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a0baff7827a632204060f48dca9e63fbd6a5a0b8790c1a2adfb25dc2c9c0d50"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:460024cacfc3246cc4d9f47a7fc860e4fcea7d1dc651e1256510d8c3c9c7cde0"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5870d620b23b956f72bafed6a0ba9a62edb5f2ef78a8849b7615bd9433384171"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2941756754a10e799e5b87e2319bbec481ed0957421fba0e7b9fb1c11e40509f"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9611b83810a74a46be88847e0ea616794c406dbcb4e25405e52bff8f4bee2d0a"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:cd7e35818d2328b679a13268d9ea505c85cd773572ebb7a0da7ccbca77b6a52e"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6b981316fcd940f085f646b822c2ff2b8b813cbd61281acad229ea3cbaabeb6b"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:688058e89f512fb7541cb85c2f149c292d3fa22f981d5a5453b40c5da49eb9e8"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56afb44a12b0864d17b597210d63a5b88915d680f6484d8d202ed68ade38673d"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:17931dfbb84ae18b287279c1f92b76a3abcd9a49cd69b92e946035cff06bcd20"}, + {file = "yarl-1.17.2-cp312-cp312-win32.whl", hash = "sha256:ff8d95e06546c3a8c188f68040e9d0360feb67ba8498baf018918f669f7bc39b"}, + {file = "yarl-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:4c840cc11163d3c01a9d8aad227683c48cd3e5be5a785921bcc2a8b4b758c4f3"}, + {file = "yarl-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3294f787a437cb5d81846de3a6697f0c35ecff37a932d73b1fe62490bef69211"}, + {file = "yarl-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1e7fedb09c059efee2533119666ca7e1a2610072076926fa028c2ba5dfeb78c"}, + {file = "yarl-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:da9d3061e61e5ae3f753654813bc1cd1c70e02fb72cf871bd6daf78443e9e2b1"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c012dceadc695ccf69301bfdccd1fc4472ad714fe2dd3c5ab4d2046afddf29"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f11fd61d72d93ac23718d393d2a64469af40be2116b24da0a4ca6922df26807e"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46c465ad06971abcf46dd532f77560181387b4eea59084434bdff97524444032"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef6eee1a61638d29cd7c85f7fd3ac7b22b4c0fabc8fd00a712b727a3e73b0685"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4434b739a8a101a837caeaa0137e0e38cb4ea561f39cb8960f3b1e7f4967a3fc"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:752485cbbb50c1e20908450ff4f94217acba9358ebdce0d8106510859d6eb19a"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:17791acaa0c0f89323c57da7b9a79f2174e26d5debbc8c02d84ebd80c2b7bff8"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5c6ea72fe619fee5e6b5d4040a451d45d8175f560b11b3d3e044cd24b2720526"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db5ac3871ed76340210fe028f535392f097fb31b875354bcb69162bba2632ef4"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7a1606ba68e311576bcb1672b2a1543417e7e0aa4c85e9e718ba6466952476c0"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9bc27dd5cfdbe3dc7f381b05e6260ca6da41931a6e582267d5ca540270afeeb2"}, + {file = "yarl-1.17.2-cp313-cp313-win32.whl", hash = "sha256:52492b87d5877ec405542f43cd3da80bdcb2d0c2fbc73236526e5f2c28e6db28"}, + {file = "yarl-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:8e1bf59e035534ba4077f5361d8d5d9194149f9ed4f823d1ee29ef3e8964ace3"}, + {file = "yarl-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c556fbc6820b6e2cda1ca675c5fa5589cf188f8da6b33e9fc05b002e603e44fa"}, + {file = "yarl-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f2f44a4247461965fed18b2573f3a9eb5e2c3cad225201ee858726cde610daca"}, + {file = "yarl-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a3ede8c248f36b60227eb777eac1dbc2f1022dc4d741b177c4379ca8e75571a"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2654caaf5584449d49c94a6b382b3cb4a246c090e72453493ea168b931206a4d"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d41c684f286ce41fa05ab6af70f32d6da1b6f0457459a56cf9e393c1c0b2217"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2270d590997445a0dc29afa92e5534bfea76ba3aea026289e811bf9ed4b65a7f"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18662443c6c3707e2fc7fad184b4dc32dd428710bbe72e1bce7fe1988d4aa654"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:75ac158560dec3ed72f6d604c81090ec44529cfb8169b05ae6fcb3e986b325d9"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1fee66b32e79264f428dc8da18396ad59cc48eef3c9c13844adec890cd339db5"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:585ce7cd97be8f538345de47b279b879e091c8b86d9dbc6d98a96a7ad78876a3"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c019abc2eca67dfa4d8fb72ba924871d764ec3c92b86d5b53b405ad3d6aa56b0"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c6e659b9a24d145e271c2faf3fa6dd1fcb3e5d3f4e17273d9e0350b6ab0fe6e2"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:d17832ba39374134c10e82d137e372b5f7478c4cceeb19d02ae3e3d1daed8721"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bc3003710e335e3f842ae3fd78efa55f11a863a89a72e9a07da214db3bf7e1f8"}, + {file = "yarl-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f5ffc6b7ace5b22d9e73b2a4c7305740a339fbd55301d52735f73e21d9eb3130"}, + {file = "yarl-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:48e424347a45568413deec6f6ee2d720de2cc0385019bedf44cd93e8638aa0ed"}, + {file = "yarl-1.17.2-py3-none-any.whl", hash = "sha256:dd7abf4f717e33b7487121faf23560b3a50924f80e4bef62b22dab441ded8f3b"}, + {file = "yarl-1.17.2.tar.gz", hash = "sha256:753eaaa0c7195244c84b5cc159dc8204b7fd99f716f11198f999f2332a86b178"}, ] [package.dependencies] idna = ">=2.0" multidict = ">=4.0" +propcache = ">=0.2.0" [[package]] name = "zipp" @@ -3484,4 +3599,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "c656496f9fbb7c29b2df3143c1d72c95b5e121cb6340134c0b8d070f54a08508" +content-hash = "8cb9c38d83eec441391c0528ac2fbefde18c734373b2399e07c69382044e8ced" diff --git a/pyproject.toml b/pyproject.toml index 9ea42bf46f..197946fff8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ psutil = "^5.9.4" types-psutil = "^5.9.5.12" types-toml = "^0.10.8.6" pytest-httpserver = "^1.0.8" -aiohttp = "3.10.2" +aiohttp = "3.10.11" pytest-rerunfailures = "^13.0" types-pytest-lazy-fixture = "^0.6.3.3" pytest-split = "^0.8.1" From 5e3fbef7210a84870cb012837db6830aeab3d38d Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:10:09 -0500 Subject: [PATCH 08/22] fix(pageserver): queue stopped error should be ignored during create timeline (#9767) close https://github.com/neondatabase/neon/issues/9730 The test case tests if anything goes wrong during pageserver restart + *during timeline creation not complete*. Therefore, queue is stopped error is normal in this case, except that it should be categorized as a shutdown error instead of a real error. ## Summary of changes * More comments for the test case. * Queue stopped error will now be forwarded as CreateTimelineError::ShuttingDown. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant.rs | 6 ++++++ test_runner/regress/test_tenants.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index e88dee7c6c..46317e93ee 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2446,6 +2446,12 @@ impl Tenant { .remote_client .wait_completion() .await + .map_err(|e| match e { + WaitCompletionError::NotInitialized( + e, // If the queue is already stopped, it's a shutdown error. + ) if e.is_stopping() => CreateTimelineError::ShuttingDown, + e => CreateTimelineError::Other(e.into()), + }) .context("wait for timeline initial uploads to complete")?; // The creating task is responsible for activating the timeline. diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 5a499ea98b..158c3fddb0 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -369,12 +369,16 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder): - Bad response codes during shutdown (e.g. returning 500 instead of 503) - Issues where a tenant is still starting up while we receive a request for it - Issues with interrupting/resuming tenant/timeline creation in shutdown + - Issues with a timeline is not created successfully because of restart. """ env = neon_env_builder.init_configs() env.start() tenant_id: TenantId = env.initial_tenant timeline_id = env.initial_timeline + # At this point, the initial tenant/timeline might not have been created successfully, + # and this is the case we want to test. + # Multiple creation requests which race will generate this error on the pageserver # and storage controller respectively env.pageserver.allowed_errors.append(".*Conflict: Tenant is already being modified.*") From b092126c94fc2af37188ad05e5951ae10c84813a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 19 Nov 2024 20:10:53 +0100 Subject: [PATCH 09/22] scrubber: fix parsing issue with Azure (#9797) Apparently Azure returns timelines ending with `/` which confuses the parsing. So remove all trailing `/`s before attempting to parse. Part of https://github.com/neondatabase/cloud/issues/19963 --- storage_scrubber/src/metadata_stream.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs index f896cff2d5..efda7c213d 100644 --- a/storage_scrubber/src/metadata_stream.rs +++ b/storage_scrubber/src/metadata_stream.rs @@ -60,7 +60,7 @@ pub async fn stream_tenant_shards<'a>( first_part .parse::() - .with_context(|| format!("Incorrect entry id str: {first_part}")) + .with_context(|| format!("Incorrect tenant entry id str: {first_part}")) }) .collect::>(); @@ -114,9 +114,10 @@ pub async fn stream_tenant_timelines<'a>( prefix.get_path().as_str().strip_prefix(prefix_str) }) .map(|entry_id_str| { - entry_id_str + let first_part = entry_id_str.split('/').next().unwrap(); + first_part .parse::() - .with_context(|| format!("Incorrect entry id str: {entry_id_str}")) + .with_context(|| format!("Incorrect timeline entry id str: {entry_id_str}")) }); for i in new_entry_ids { From b22a84a7bf2ccae30243be81439cc284835a37f1 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:38:41 -0500 Subject: [PATCH 10/22] feat(pageserver): support key range for manual compaction trigger (#9723) part of https://github.com/neondatabase/neon/issues/9114, we want to be able to run partial gc-compaction in tests. In the future, we can also expand this functionality to legacy compaction, so that we can trigger compaction for a specific key range. ## Summary of changes * Support passing compaction key range through pageserver routes. * Refactor input parameters of compact related function to take the new `CompactOptions`. * Add tests for partial compaction. Note that the test may or may not trigger compaction based on GC horizon. We need to improve the test case to ensure things always get below the gc_horizon and the gc-compaction can be triggered. --------- Signed-off-by: Alex Chi Z --- libs/utils/src/http/json.rs | 22 ++++++++++ pageserver/src/http/routes.rs | 15 +++++-- pageserver/src/tenant.rs | 42 ++++++++++++------- pageserver/src/tenant/timeline.rs | 36 +++++++++++++++- pageserver/src/tenant/timeline/compaction.rs | 43 +++++++++++++++----- test_runner/fixtures/pageserver/http.py | 2 + test_runner/regress/test_compaction.py | 39 ++++++++++++++++++ 7 files changed, 170 insertions(+), 29 deletions(-) diff --git a/libs/utils/src/http/json.rs b/libs/utils/src/http/json.rs index 6c25440b42..e53231f313 100644 --- a/libs/utils/src/http/json.rs +++ b/libs/utils/src/http/json.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; use super::error::ApiError; +/// Parse a json request body and deserialize it to the type `T`. pub async fn json_request Deserialize<'de>>( request: &mut Request, ) -> Result { @@ -27,6 +28,27 @@ pub async fn json_request Deserialize<'de>>( .map_err(ApiError::BadRequest) } +/// Parse a json request body and deserialize it to the type `T`. If the body is empty, return `T::default`. +pub async fn json_request_maybe Deserialize<'de> + Default>( + request: &mut Request, +) -> Result { + let body = hyper::body::aggregate(request.body_mut()) + .await + .context("Failed to read request body") + .map_err(ApiError::BadRequest)?; + + if body.remaining() == 0 { + return Ok(T::default()); + } + + let mut deser = serde_json::de::Deserializer::from_reader(body.reader()); + + serde_path_to_error::deserialize(&mut deser) + // intentionally stringify because the debug version is not helpful in python logs + .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}")) + .map_err(ApiError::BadRequest) +} + pub fn json_response( status: StatusCode, data: T, diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ab170679ba..306b0f35ab 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -83,6 +83,8 @@ use crate::tenant::storage_layer::LayerName; use crate::tenant::timeline::offload::offload_timeline; use crate::tenant::timeline::offload::OffloadError; use crate::tenant::timeline::CompactFlags; +use crate::tenant::timeline::CompactOptions; +use crate::tenant::timeline::CompactRange; use crate::tenant::timeline::CompactionError; use crate::tenant::timeline::Timeline; use crate::tenant::GetTimelineError; @@ -100,7 +102,7 @@ use utils::{ http::{ endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with}, error::{ApiError, HttpErrorBody}, - json::{json_request, json_response}, + json::{json_request, json_request_maybe, json_response}, request::parse_request_param, RequestExt, RouterBuilder, }, @@ -1927,13 +1929,15 @@ async fn timeline_gc_handler( // Run compaction immediately on given timeline. async fn timeline_compact_handler( - request: Request, + mut request: Request, cancel: CancellationToken, ) -> Result, ApiError> { let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; + let compact_range = json_request_maybe::>(&mut request).await?; + let state = get_state(&request); let mut flags = EnumSet::empty(); @@ -1957,11 +1961,16 @@ async fn timeline_compact_handler( let wait_until_uploaded = parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); + let options = CompactOptions { + compact_range, + flags, + }; + async { let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; timeline - .compact(&cancel, flags, &ctx) + .compact_with_options(&cancel, options, &ctx) .await .map_err(|e| ApiError::InternalServerError(e.into()))?; if wait_until_uploaded { diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 46317e93ee..37bf83c984 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5254,7 +5254,7 @@ mod tests { use storage_layer::PersistentLayerKey; use tests::storage_layer::ValuesReconstructState; use tests::timeline::{GetVectoredError, ShutdownMode}; - use timeline::DeltaLayerTestDesc; + use timeline::{CompactOptions, DeltaLayerTestDesc}; use utils::id::TenantId; #[cfg(feature = "testing")] @@ -7728,7 +7728,7 @@ mod tests { let cancel = CancellationToken::new(); tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -7805,7 +7805,7 @@ mod tests { guard.cutoffs.space = Lsn(0x40); } tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -8237,7 +8237,7 @@ mod tests { let cancel = CancellationToken::new(); tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -8266,7 +8266,7 @@ mod tests { guard.cutoffs.space = Lsn(0x40); } tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -8819,7 +8819,14 @@ mod tests { dryrun_flags.insert(CompactFlags::DryRun); tline - .compact_with_gc(&cancel, dryrun_flags, &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: dryrun_flags, + compact_range: None, + }, + &ctx, + ) .await .unwrap(); // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs @@ -8827,14 +8834,14 @@ mod tests { verify_result().await; tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; // compact again tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; @@ -8847,14 +8854,14 @@ mod tests { guard.cutoffs.space = Lsn(0x38); } tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result // not increasing the GC horizon and compact again tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; @@ -9048,7 +9055,14 @@ mod tests { dryrun_flags.insert(CompactFlags::DryRun); tline - .compact_with_gc(&cancel, dryrun_flags, &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: dryrun_flags, + compact_range: None, + }, + &ctx, + ) .await .unwrap(); // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs @@ -9056,14 +9070,14 @@ mod tests { verify_result().await; tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; // compact again tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; @@ -9248,7 +9262,7 @@ mod tests { let cancel = CancellationToken::new(); branch_tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 5547bc2c7a..0eb3de21e9 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -774,6 +774,21 @@ pub(crate) enum CompactFlags { DryRun, } +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize)] +pub(crate) struct CompactRange { + #[serde_as(as = "serde_with::DisplayFromStr")] + pub start: Key, + #[serde_as(as = "serde_with::DisplayFromStr")] + pub end: Key, +} + +#[derive(Clone, Default)] +pub(crate) struct CompactOptions { + pub flags: EnumSet, + pub compact_range: Option, +} + impl std::fmt::Debug for Timeline { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "Timeline<{}>", self.timeline_id) @@ -1612,6 +1627,25 @@ impl Timeline { cancel: &CancellationToken, flags: EnumSet, ctx: &RequestContext, + ) -> Result { + self.compact_with_options( + cancel, + CompactOptions { + flags, + compact_range: None, + }, + ctx, + ) + .await + } + + /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending + /// compaction tasks. + pub(crate) async fn compact_with_options( + self: &Arc, + cancel: &CancellationToken, + options: CompactOptions, + ctx: &RequestContext, ) -> Result { // most likely the cancellation token is from background task, but in tests it could be the // request task as well. @@ -1649,7 +1683,7 @@ impl Timeline { self.compact_tiered(cancel, ctx).await?; Ok(false) } - CompactionAlgorithm::Legacy => self.compact_legacy(cancel, flags, ctx).await, + CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await, } } diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index b30e380de5..ecd68ba55e 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use super::layer_manager::LayerManager; use super::{ - CompactFlags, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode, + CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode, RecordedDuration, Timeline, }; @@ -273,22 +273,32 @@ impl Timeline { pub(crate) async fn compact_legacy( self: &Arc, cancel: &CancellationToken, - flags: EnumSet, + options: CompactOptions, ctx: &RequestContext, ) -> Result { - if flags.contains(CompactFlags::EnhancedGcBottomMostCompaction) { - self.compact_with_gc(cancel, flags, ctx) + if options + .flags + .contains(CompactFlags::EnhancedGcBottomMostCompaction) + { + self.compact_with_gc(cancel, options, ctx) .await .map_err(CompactionError::Other)?; return Ok(false); } - if flags.contains(CompactFlags::DryRun) { + if options.flags.contains(CompactFlags::DryRun) { return Err(CompactionError::Other(anyhow!( "dry-run mode is not supported for legacy compaction for now" ))); } + if options.compact_range.is_some() { + // maybe useful in the future? could implement this at some point + return Err(CompactionError::Other(anyhow!( + "compaction range is not supported for legacy compaction for now" + ))); + } + // High level strategy for compaction / image creation: // // 1. First, calculate the desired "partitioning" of the @@ -338,7 +348,7 @@ impl Timeline { .repartition( self.get_last_record_lsn(), self.get_compaction_target_size(), - flags, + options.flags, ctx, ) .await @@ -354,7 +364,7 @@ impl Timeline { let fully_compacted = self .compact_level0( target_file_size, - flags.contains(CompactFlags::ForceL0Compaction), + options.flags.contains(CompactFlags::ForceL0Compaction), ctx, ) .await?; @@ -372,7 +382,10 @@ impl Timeline { .create_image_layers( &partitioning, lsn, - if flags.contains(CompactFlags::ForceImageLayerCreation) { + if options + .flags + .contains(CompactFlags::ForceImageLayerCreation) + { ImageLayerCreationMode::Force } else { ImageLayerCreationMode::Try @@ -1736,11 +1749,19 @@ impl Timeline { pub(crate) async fn compact_with_gc( self: &Arc, cancel: &CancellationToken, - flags: EnumSet, + options: CompactOptions, ctx: &RequestContext, ) -> anyhow::Result<()> { - self.partial_compact_with_gc(Key::MIN..Key::MAX, cancel, flags, ctx) - .await + self.partial_compact_with_gc( + options + .compact_range + .map(|range| range.start..range.end) + .unwrap_or_else(|| Key::MIN..Key::MAX), + cancel, + options.flags, + ctx, + ) + .await } /// An experimental compaction building block that combines compaction with garbage collection. diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index d1a9b5921a..01583757fa 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -665,6 +665,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): force_l0_compaction=False, wait_until_uploaded=False, enhanced_gc_bottom_most_compaction=False, + body: Optional[dict[str, Any]] = None, ): self.is_testing_enabled_or_skip() query = {} @@ -683,6 +684,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact", params=query, + json=body, ) log.info(f"Got compact request response code: {res.status_code}") self.verbose_error(res) diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index a02d0f6b98..48950a5a50 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -116,6 +116,45 @@ page_cache_size=10 assert vectored_average < 8 +def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF) + + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + row_count = 1000 + churn_rounds = 10 + + ps_http = env.pageserver.http_client() + + workload = Workload(env, tenant_id, timeline_id) + workload.init(env.pageserver.id) + + log.info("Writing initial data ...") + workload.write_rows(row_count, env.pageserver.id) + + for i in range(1, churn_rounds + 1): + if i % 10 == 0: + log.info(f"Running churn round {i}/{churn_rounds} ...") + + workload.churn_rows(row_count, env.pageserver.id) + # Force L0 compaction to ensure the number of layers is within bounds, so that gc-compaction can run. + ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True) + assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1 + ps_http.timeline_compact( + tenant_id, + timeline_id, + enhanced_gc_bottom_most_compaction=True, + body={ + "start": "000000000000000000000000000000000000", + "end": "030000000000000000000000000000000000", + }, + ) + + log.info("Validating at workload end ...") + workload.validate(env.pageserver.id) + + # Stripe sizes in number of pages. TINY_STRIPES = 16 LARGE_STRIPES = 32768 From 770ac34ae6137bfb3c7dab9536a2943e209f21d0 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Tue, 19 Nov 2024 22:29:57 +0200 Subject: [PATCH 11/22] Register custom xlog reader callbacks for on-demand WAL download in StartupDecodingContext (#9007) ## Problem See https://github.com/neondatabase/neon/issues/8931 On-demand WAL download are not set in all cases where WAL is accessed by logical replication ## Summary of changes Set customer xlog reader handles in StartupDecodingContext Related changes in Postgres modules: https://github.com/neondatabase/postgres/pull/495 https://github.com/neondatabase/postgres/pull/496 https://github.com/neondatabase/postgres/pull/497 https://github.com/neondatabase/postgres/pull/498 ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon/neon.c | 4 +-- .../regress/test_ondemand_wal_download.py | 27 +++++++++++++++++++ vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 8 +++--- 7 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 test_runner/regress/test_ondemand_wal_download.py diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index f207ed61f9..51b9f58bbc 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -421,9 +421,7 @@ _PG_init(void) pg_init_libpagestore(); pg_init_walproposer(); - WalSender_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; - LogicalFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; - SlotFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; + Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; InitUnstableExtensionsSupport(); InitLogicalReplicationMonitor(); diff --git a/test_runner/regress/test_ondemand_wal_download.py b/test_runner/regress/test_ondemand_wal_download.py new file mode 100644 index 0000000000..a7eb3e6625 --- /dev/null +++ b/test_runner/regress/test_ondemand_wal_download.py @@ -0,0 +1,27 @@ +from fixtures.neon_fixtures import NeonEnv + + +def test_on_demand_wal_download(neon_simple_env: NeonEnv): + env = neon_simple_env + ep = env.endpoints.create_start( + branch_name="main", + endpoint_id="primary", + config_lines=[ + "max_wal_size=32MB", + "min_wal_size=32MB", + "neon.logical_replication_max_snap_files=10000", + ], + ) + + con = ep.connect() + cur = con.cursor() + cur.execute("CREATE TABLE t(pk bigint primary key, payload text)") + cur.execute("ALTER TABLE t ALTER payload SET STORAGE external") + cur.execute("select pg_create_logical_replication_slot('myslot', 'test_decoding', false, true)") + cur.execute("insert into t values (generate_series(1,100000),repeat('?',10000))") + + ep.stop("fast") + ep.start() + con = ep.connect() + cur = con.cursor() + cur.execute("select pg_replication_slot_advance('myslot', pg_current_wal_insert_lsn())") diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index e54af35045..aeecd27b1f 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit e54af3504513b1f44c0e0f68791a0d6d4210e948 +Subproject commit aeecd27b1f0775b606409d1cbb9c8aa9853a82af diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 29bf1f04a5..544620db4c 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 29bf1f04a5628618b4c7972fed6f87065e3750ce +Subproject commit 544620db4ca6945be4f1f686a7fbd2cdfb0bf96f diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index b7e9ac3eb9..3cc152ae2d 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34 +Subproject commit 3cc152ae2d17b19679c7102486bdb94677705c02 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index a05dc1378d..e5d795a1a0 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit a05dc1378dd822276dc99cb5e888f905d3527597 +Subproject commit e5d795a1a0c25da907176d37c905badab70e00c0 diff --git a/vendor/revisions.json b/vendor/revisions.json index 7243ba8716..a13ef29e45 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ "17.2", - "a05dc1378dd822276dc99cb5e888f905d3527597" + "e5d795a1a0c25da907176d37c905badab70e00c0" ], "v16": [ "16.6", - "b7e9ac3eb9c5f43c443ebc76ddf06d5038c9bb34" + "3cc152ae2d17b19679c7102486bdb94677705c02" ], "v15": [ "15.10", - "29bf1f04a5628618b4c7972fed6f87065e3750ce" + "544620db4ca6945be4f1f686a7fbd2cdfb0bf96f" ], "v14": [ "14.15", - "e54af3504513b1f44c0e0f68791a0d6d4210e948" + "aeecd27b1f0775b606409d1cbb9c8aa9853a82af" ] } From 725e0a1ac9d1b409c57d2e7d87ac16d9c3d9f91b Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 19 Nov 2024 23:03:15 +0000 Subject: [PATCH 12/22] CI(release): create reusable workflow for releases (#9806) ## Problem We have a bunch of duplicated code for automated releases. There will be even more, once we have `release-compute` branch (https://github.com/neondatabase/neon/pull/9637). Another issue with the current `release` workflow is that it creates a PR from the main as is. If we create 2 different releases from the same commit, GitHub could mix up results from different PRs. ## Summary of changes - Create a reusable workflow for releases - Create an empty commit to differentiate releases --- .github/workflows/_create-release-pr.yml | 79 ++++++++++++++++++++++ .github/workflows/release.yml | 84 ++++-------------------- 2 files changed, 93 insertions(+), 70 deletions(-) create mode 100644 .github/workflows/_create-release-pr.yml diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml new file mode 100644 index 0000000000..cc6994397f --- /dev/null +++ b/.github/workflows/_create-release-pr.yml @@ -0,0 +1,79 @@ +name: Create Release PR + +on: + workflow_call: + inputs: + component-name: + description: 'Component name' + required: true + type: string + release-branch: + description: 'Release branch' + required: true + type: string + secrets: + ci-access-token: + description: 'CI access token' + required: true + +defaults: + run: + shell: bash -euo pipefail {0} + +jobs: + create-storage-release-branch: + runs-on: ubuntu-22.04 + + permissions: + contents: write # for `git push` + + steps: + - uses: actions/checkout@v4 + with: + ref: main + + - name: Set variables + id: vars + env: + COMPONENT_NAME: ${{ inputs.component-name }} + RELEASE_BRANCH: ${{ inputs.release-branch }} + run: | + today=$(date +'%Y-%m-%d') + echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT} + echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT} + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create RC branch + env: + RC_BRANCH: ${{ steps.vars.outputs.rc-branch }} + TITLE: ${{ steps.vars.outputs.title }} + run: | + git checkout -b "${RC_BRANCH}" + + # create an empty commit to distinguish workflow runs + # from other possible releases from the same commit + git commit --allow-empty -m "${TITLE}" + + git push origin "${RC_BRANCH}" + + - name: Create a PR into ${{ inputs.release-branch }} + env: + GH_TOKEN: ${{ secrets.ci-access-token }} + RC_BRANCH: ${{ steps.vars.outputs.rc-branch }} + RELEASE_BRANCH: ${{ inputs.release-branch }} + TITLE: ${{ steps.vars.outputs.title }} + run: | + cat << EOF > body.md + ## ${TITLE} + + **Please merge this Pull Request using 'Create a merge commit' button** + EOF + + gh pr create --title "${TITLE}" \ + --body-file "body.md" \ + --head "${RC_BRANCH}" \ + --base "${RELEASE_BRANCH}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 56ef6f4bbb..11f010b6d4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,82 +26,26 @@ defaults: jobs: create-storage-release-branch: if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }} - runs-on: ubuntu-22.04 permissions: - contents: write # for `git push` + contents: write - steps: - - name: Check out code - uses: actions/checkout@v4 - with: - ref: main - - - name: Set environment variables - run: | - echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - echo "RELEASE_BRANCH=rc/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - - - name: Create release branch - run: git checkout -b $RELEASE_BRANCH - - - name: Push new branch - run: git push origin $RELEASE_BRANCH - - - name: Create pull request into release - env: - GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - run: | - TITLE="Storage & Compute release ${RELEASE_DATE}" - - cat << EOF > body.md - ## ${TITLE} - - **Please merge this Pull Request using 'Create a merge commit' button** - EOF - - gh pr create --title "${TITLE}" \ - --body-file "body.md" \ - --head "${RELEASE_BRANCH}" \ - --base "release" + uses: ./.github/workflows/_create-release-pr.yml + with: + component-name: 'Storage & Compute' + release-branch: 'release' + secrets: + ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} create-proxy-release-branch: if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }} - runs-on: ubuntu-22.04 permissions: - contents: write # for `git push` + contents: write - steps: - - name: Check out code - uses: actions/checkout@v4 - with: - ref: main - - - name: Set environment variables - run: | - echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - echo "RELEASE_BRANCH=rc/proxy/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - - - name: Create release branch - run: git checkout -b $RELEASE_BRANCH - - - name: Push new branch - run: git push origin $RELEASE_BRANCH - - - name: Create pull request into release - env: - GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - run: | - TITLE="Proxy release ${RELEASE_DATE}" - - cat << EOF > body.md - ## ${TITLE} - - **Please merge this Pull Request using 'Create a merge commit' button** - EOF - - gh pr create --title "${TITLE}" \ - --body-file "body.md" \ - --head "${RELEASE_BRANCH}" \ - --base "release-proxy" + uses: ./.github/workflows/_create-release-pr.yml + with: + component-name: 'Proxy' + release-branch: 'release-proxy' + secrets: + ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} From 2281a02c49fd396ef9b06fafa35028c33eea8b3d Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 20 Nov 2024 00:30:24 +0000 Subject: [PATCH 13/22] CODEOWNERS: add developer-productivity team (#9810) Notify @neondatabase/developer-productivity team about changes in CI (i.e. in `.github/` directory) --- CODEOWNERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index f8ed4be816..21b0e7c51f 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,6 +1,5 @@ +/.github/ @neondatabase/developer-productivity /compute_tools/ @neondatabase/control-plane @neondatabase/compute -/storage_controller @neondatabase/storage -/storage_scrubber @neondatabase/storage /libs/pageserver_api/ @neondatabase/storage /libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage /libs/remote_storage/ @neondatabase/storage @@ -11,4 +10,6 @@ /pgxn/neon/ @neondatabase/compute @neondatabase/storage /proxy/ @neondatabase/proxy /safekeeper/ @neondatabase/storage +/storage_controller @neondatabase/storage +/storage_scrubber @neondatabase/storage /vendor/ @neondatabase/compute From ea1858e3b66fa058ce8ddfb6f37b364154dd20a6 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Wed, 20 Nov 2024 02:14:58 +0100 Subject: [PATCH 14/22] compute_ctl: Streamline and Pipeline startup SQL (#9717) Before, compute_ctl didn't have a good registry for what command would run when, depending exclusively on sync code to apply changes. When users have many databases/roles to manage, this step can take a substantial amount of time, breaking assumptions about low (re)start times in other systems. This commit reduces the time compute_ctl takes to restart when changes must be applied, by making all commands more or less blind writes, and applying these commands in an asynchronous context, only waiting for completion once we know the commands have all been sent. Additionally, this reduces time spent by batching per-database operations where previously we would create a new SQL connection for every user-database operation we planned to execute. --- compute_tools/src/catalog.rs | 44 +- compute_tools/src/checker.rs | 28 - compute_tools/src/compute.rs | 397 ++++++++-- compute_tools/src/lib.rs | 1 + compute_tools/src/pg_helpers.rs | 39 +- compute_tools/src/spec.rs | 634 +--------------- compute_tools/src/spec_apply.rs | 680 ++++++++++++++++++ .../src/sql/add_availabilitycheck_tables.sql | 18 + .../src/sql/anon_ext_fn_reassign.sql | 12 + compute_tools/src/sql/default_grants.sql | 30 + .../src/sql/set_public_schema_owner.sql | 23 + .../src/sql/unset_template_for_drop_dbs.sql | 12 + 12 files changed, 1146 insertions(+), 772 deletions(-) create mode 100644 compute_tools/src/spec_apply.rs create mode 100644 compute_tools/src/sql/add_availabilitycheck_tables.sql create mode 100644 compute_tools/src/sql/anon_ext_fn_reassign.sql create mode 100644 compute_tools/src/sql/default_grants.sql create mode 100644 compute_tools/src/sql/set_public_schema_owner.sql create mode 100644 compute_tools/src/sql/unset_template_for_drop_dbs.sql diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs index 4fefa831e0..2f6f82dd39 100644 --- a/compute_tools/src/catalog.rs +++ b/compute_tools/src/catalog.rs @@ -1,38 +1,40 @@ -use compute_api::{ - responses::CatalogObjects, - spec::{Database, Role}, -}; +use compute_api::responses::CatalogObjects; use futures::Stream; -use postgres::{Client, NoTls}; +use postgres::NoTls; use std::{path::Path, process::Stdio, result::Result, sync::Arc}; use tokio::{ io::{AsyncBufReadExt, BufReader}, process::Command, - task, + spawn, }; +use tokio_postgres::connect; use tokio_stream::{self as stream, StreamExt}; use tokio_util::codec::{BytesCodec, FramedRead}; use tracing::warn; -use crate::{ - compute::ComputeNode, - pg_helpers::{get_existing_dbs, get_existing_roles}, -}; +use crate::compute::ComputeNode; +use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async}; pub async fn get_dbs_and_roles(compute: &Arc) -> anyhow::Result { let connstr = compute.connstr.clone(); - task::spawn_blocking(move || { - let mut client = Client::connect(connstr.as_str(), NoTls)?; - let roles: Vec; - { - let mut xact = client.transaction()?; - roles = get_existing_roles(&mut xact)?; - } - let databases: Vec = get_existing_dbs(&mut client)?.values().cloned().collect(); - Ok(CatalogObjects { roles, databases }) - }) - .await? + let (client, connection): (tokio_postgres::Client, _) = + connect(connstr.as_str(), NoTls).await?; + + spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + let roles = get_existing_roles_async(&client).await?; + + let databases = get_existing_dbs_async(&client) + .await? + .into_values() + .collect(); + + Ok(CatalogObjects { roles, databases }) } #[derive(Debug, thiserror::Error)] diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs index d76eaad0a0..cec2b1bed8 100644 --- a/compute_tools/src/checker.rs +++ b/compute_tools/src/checker.rs @@ -1,37 +1,9 @@ use anyhow::{anyhow, Ok, Result}; -use postgres::Client; use tokio_postgres::NoTls; use tracing::{error, instrument, warn}; use crate::compute::ComputeNode; -/// Create a special service table for availability checks -/// only if it does not exist already. -pub fn create_availability_check_data(client: &mut Client) -> Result<()> { - let query = " - DO $$ - BEGIN - IF NOT EXISTS( - SELECT 1 - FROM pg_catalog.pg_tables - WHERE tablename = 'health_check' - ) - THEN - CREATE TABLE health_check ( - id serial primary key, - updated_at timestamptz default now() - ); - INSERT INTO health_check VALUES (1, now()) - ON CONFLICT (id) DO UPDATE - SET updated_at = now(); - END IF; - END - $$;"; - client.execute(query, &[])?; - - Ok(()) -} - /// Update timestamp in a row in a special service table to check /// that we can actually write some data in this particular timeline. #[instrument(skip_all)] diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 0a8cb14058..4f67425ba8 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1,20 +1,21 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::env; use std::fs; +use std::iter::once; use std::os::unix::fs::{symlink, PermissionsExt}; use std::path::Path; use std::process::{Command, Stdio}; use std::str::FromStr; use std::sync::atomic::AtomicU32; use std::sync::atomic::Ordering; -use std::sync::{Condvar, Mutex, RwLock}; +use std::sync::{Arc, Condvar, Mutex, RwLock}; use std::thread; use std::time::Duration; use std::time::Instant; use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; -use compute_api::spec::PgIdent; +use compute_api::spec::{PgIdent, Role}; use futures::future::join_all; use futures::stream::FuturesUnordered; use futures::StreamExt; @@ -31,15 +32,23 @@ use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion}; use utils::measured_stream::MeasuredReader; use nix::sys::signal::{kill, Signal}; - use remote_storage::{DownloadError, RemotePath}; +use tokio::spawn; +use url::Url; -use crate::checker::create_availability_check_data; use crate::installed_extensions::get_installed_extensions_sync; use crate::local_proxy; -use crate::logger::inlinify; use crate::pg_helpers::*; use crate::spec::*; +use crate::spec_apply::ApplySpecPhase::{ + CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser, + DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions, + RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase, +}; +use crate::spec_apply::PerDatabasePhase::{ + ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension, +}; +use crate::spec_apply::{apply_operations, MutableApplyContext, DB}; use crate::sync_sk::{check_if_synced, ping_safekeeper}; use crate::{config, extension_server}; @@ -224,10 +233,7 @@ fn maybe_cgexec(cmd: &str) -> Command { } } -/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser -/// that we give to customers -#[instrument(skip_all)] -fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> { +pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String { let roles = spec .cluster .roles @@ -296,11 +302,8 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> $$;"#, roles_decl, database_decl, ); - info!("Neon superuser created: {}", inlinify(&query)); - client - .simple_query(&query) - .map_err(|e| anyhow::anyhow!(e).context(query))?; - Ok(()) + + query } impl ComputeNode { @@ -813,21 +816,14 @@ impl ComputeNode { Ok(()) } - /// Do initial configuration of the already started Postgres. - #[instrument(skip_all)] - pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> { - // If connection fails, - // it may be the old node with `zenith_admin` superuser. - // - // In this case we need to connect with old `zenith_admin` name - // and create new user. We cannot simply rename connected user, - // but we can create a new one and grant it all privileges. - let mut connstr = self.connstr.clone(); + async fn get_maintenance_client(url: &Url) -> Result { + let mut connstr = url.clone(); + connstr .query_pairs_mut() .append_pair("application_name", "apply_config"); - let mut client = match Client::connect(connstr.as_str(), NoTls) { + let (client, conn) = match tokio_postgres::connect(connstr.as_str(), NoTls).await { Err(e) => match e.code() { Some(&SqlState::INVALID_PASSWORD) | Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => { @@ -845,8 +841,8 @@ impl ComputeNode { let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls) .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?; - // Disable forwarding so that users don't get a cloud_admin role + // Disable forwarding so that users don't get a cloud_admin role let mut func = || { client.simple_query("SET neon.forward_ddl = false")?; client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?; @@ -858,49 +854,309 @@ impl ComputeNode { drop(client); // reconnect with connstring with expected name - Client::connect(connstr.as_str(), NoTls)? + tokio_postgres::connect(connstr.as_str(), NoTls).await? } _ => return Err(e.into()), }, - Ok(client) => client, + Ok((client, conn)) => (client, conn), }; - // Disable DDL forwarding because control plane already knows about these roles/databases. + spawn(async move { + if let Err(e) = conn.await { + error!("maintenance client connection error: {}", e); + } + }); + + // Disable DDL forwarding because control plane already knows about the roles/databases + // we're about to modify. client .simple_query("SET neon.forward_ddl = false") + .await .context("apply_config SET neon.forward_ddl = false")?; - // Proceed with post-startup configuration. Note, that order of operations is important. - let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; - create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?; - cleanup_instance(&mut client).context("apply_config cleanup_instance")?; - handle_roles(spec, &mut client).context("apply_config handle_roles")?; - handle_databases(spec, &mut client).context("apply_config handle_databases")?; - handle_role_deletions(spec, connstr.as_str(), &mut client) - .context("apply_config handle_role_deletions")?; - handle_grants( - spec, - &mut client, - connstr.as_str(), - self.has_feature(ComputeFeature::AnonExtension), - ) - .context("apply_config handle_grants")?; - handle_extensions(spec, &mut client).context("apply_config handle_extensions")?; - handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?; - create_availability_check_data(&mut client) - .context("apply_config create_availability_check_data")?; + Ok(client) + } - // 'Close' connection - drop(client); + /// Apply the spec to the running PostgreSQL instance. + /// The caller can decide to run with multiple clients in parallel, or + /// single mode. Either way, the commands executed will be the same, and + /// only commands run in different databases are parallelized. + #[instrument(skip_all)] + pub fn apply_spec_sql( + &self, + spec: Arc, + url: Arc, + concurrency: usize, + ) -> Result<()> { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; - if let Some(ref local_proxy) = spec.local_proxy_config { + info!("Applying config with max {} concurrency", concurrency); + debug!("Config: {:?}", spec); + + rt.block_on(async { + // Proceed with post-startup configuration. Note, that order of operations is important. + let client = Self::get_maintenance_client(&url).await?; + let spec = spec.clone(); + + let databases = get_existing_dbs_async(&client).await?; + let roles = get_existing_roles_async(&client) + .await? + .into_iter() + .map(|role| (role.name.clone(), role)) + .collect::>(); + + let jwks_roles = Arc::new( + spec.as_ref() + .local_proxy_config + .iter() + .flat_map(|it| &it.jwks) + .flatten() + .flat_map(|setting| &setting.role_names) + .cloned() + .collect::>(), + ); + + let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext { + roles, + dbs: databases, + })); + + for phase in [ + CreateSuperUser, + DropInvalidDatabases, + RenameRoles, + CreateAndAlterRoles, + RenameAndDeleteDatabases, + CreateAndAlterDatabases, + ] { + debug!("Applying phase {:?}", &phase); + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + phase, + || async { Ok(&client) }, + ) + .await?; + } + + let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency)); + + let db_processes = spec + .cluster + .databases + .iter() + .map(|db| DB::new(db.clone())) + // include + .chain(once(DB::SystemDB)) + .map(|db| { + let spec = spec.clone(); + let ctx = ctx.clone(); + let jwks_roles = jwks_roles.clone(); + let mut url = url.as_ref().clone(); + let concurrency_token = concurrency_token.clone(); + let db = db.clone(); + + debug!("Applying per-database phases for Database {:?}", &db); + + match &db { + DB::SystemDB => {} + DB::UserDB(db) => { + url.set_path(db.name.as_str()); + } + } + + let url = Arc::new(url); + let fut = Self::apply_spec_sql_db( + spec.clone(), + url, + ctx.clone(), + jwks_roles.clone(), + concurrency_token.clone(), + db, + ); + + Ok(spawn(fut)) + }) + .collect::>>(); + + for process in db_processes.into_iter() { + let handle = process?; + handle.await??; + } + + for phase in vec![ + HandleOtherExtensions, + HandleNeonExtension, + CreateAvailabilityCheck, + DropRoles, + ] { + debug!("Applying phase {:?}", &phase); + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + phase, + || async { Ok(&client) }, + ) + .await?; + } + + Ok::<(), anyhow::Error>(()) + })?; + + Ok(()) + } + + /// Apply SQL migrations of the RunInEachDatabase phase. + /// + /// May opt to not connect to databases that don't have any scheduled + /// operations. The function is concurrency-controlled with the provided + /// semaphore. The caller has to make sure the semaphore isn't exhausted. + async fn apply_spec_sql_db( + spec: Arc, + url: Arc, + ctx: Arc>, + jwks_roles: Arc>, + concurrency_token: Arc, + db: DB, + ) -> Result<()> { + let _permit = concurrency_token.acquire().await?; + + let mut client_conn = None; + + for subphase in [ + DeleteDBRoleReferences, + ChangeSchemaPerms, + HandleAnonExtension, + ] { + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + RunInEachDatabase { + db: db.clone(), + subphase, + }, + // Only connect if apply_operation actually wants a connection. + // It's quite possible this database doesn't need any queries, + // so by not connecting we save time and effort connecting to + // that database. + || async { + if client_conn.is_none() { + let db_client = Self::get_maintenance_client(&url).await?; + client_conn.replace(db_client); + } + let client = client_conn.as_ref().unwrap(); + Ok(client) + }, + ) + .await?; + } + + drop(client_conn); + + Ok::<(), anyhow::Error>(()) + } + + /// Do initial configuration of the already started Postgres. + #[instrument(skip_all)] + pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> { + // If connection fails, + // it may be the old node with `zenith_admin` superuser. + // + // In this case we need to connect with old `zenith_admin` name + // and create new user. We cannot simply rename connected user, + // but we can create a new one and grant it all privileges. + let mut url = self.connstr.clone(); + url.query_pairs_mut() + .append_pair("application_name", "apply_config"); + + let url = Arc::new(url); + let spec = Arc::new( + compute_state + .pspec + .as_ref() + .expect("spec must be set") + .spec + .clone(), + ); + + // Choose how many concurrent connections to use for applying the spec changes. + // If the cluster is not currently Running we don't have to deal with user connections, + // and can thus use all `max_connections` connection slots. However, that's generally not + // very efficient, so we generally still limit it to a smaller number. + let max_concurrent_connections = if compute_state.status != ComputeStatus::Running { + // If the settings contain 'max_connections', use that as template + if let Some(config) = spec.cluster.settings.find("max_connections") { + config.parse::().ok() + } else { + // Otherwise, try to find the setting in the postgresql_conf string + spec.cluster + .postgresql_conf + .iter() + .flat_map(|conf| conf.split("\n")) + .filter_map(|line| { + if !line.contains("max_connections") { + return None; + } + + let (key, value) = line.split_once("=")?; + let key = key + .trim_start_matches(char::is_whitespace) + .trim_end_matches(char::is_whitespace); + + let value = value + .trim_start_matches(char::is_whitespace) + .trim_end_matches(char::is_whitespace); + + if key != "max_connections" { + return None; + } + + value.parse::().ok() + }) + .next() + } + // If max_connections is present, use at most 1/3rd of that. + // When max_connections is lower than 30, try to use at least 10 connections, but + // never more than max_connections. + .map(|limit| match limit { + 0..10 => limit, + 10..30 => 10, + 30.. => limit / 3, + }) + // If we didn't find max_connections, default to 10 concurrent connections. + .unwrap_or(10) + } else { + // state == Running + // Because the cluster is already in the Running state, we should assume users are + // already connected to the cluster, and high concurrency could negatively + // impact user connectivity. Therefore, we can limit concurrency to the number of + // reserved superuser connections, which users wouldn't be able to use anyway. + spec.cluster + .settings + .find("superuser_reserved_connections") + .iter() + .filter_map(|val| val.parse::().ok()) + .map(|val| if val > 1 { val - 1 } else { 1 }) + .last() + .unwrap_or(3) + }; + + // Merge-apply spec & changes to PostgreSQL state. + self.apply_spec_sql(spec.clone(), url.clone(), max_concurrent_connections)?; + + if let Some(ref local_proxy) = &spec.clone().local_proxy_config { info!("configuring local_proxy"); local_proxy::configure(local_proxy).context("apply_config local_proxy")?; } // Run migrations separately to not hold up cold starts thread::spawn(move || { - let mut connstr = connstr.clone(); + let mut connstr = url.as_ref().clone(); connstr .query_pairs_mut() .append_pair("application_name", "migrations"); @@ -908,7 +1164,8 @@ impl ComputeNode { let mut client = Client::connect(connstr.as_str(), NoTls)?; handle_migrations(&mut client).context("apply_config handle_migrations") }); - Ok(()) + + Ok::<(), anyhow::Error>(()) } // Wrapped this around `pg_ctl reload`, but right now we don't use @@ -971,32 +1228,16 @@ impl ComputeNode { config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || { self.pg_reload_conf()?; - let mut client = Client::connect(self.connstr.as_str(), NoTls)?; - - // Proceed with post-startup configuration. Note, that order of operations is important. - // Disable DDL forwarding because control plane already knows about these roles/databases. if spec.mode == ComputeMode::Primary { - client.simple_query("SET neon.forward_ddl = false")?; - cleanup_instance(&mut client)?; - handle_roles(&spec, &mut client)?; - handle_databases(&spec, &mut client)?; - handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?; - handle_grants( - &spec, - &mut client, - self.connstr.as_str(), - self.has_feature(ComputeFeature::AnonExtension), - )?; - handle_extensions(&spec, &mut client)?; - handle_extension_neon(&mut client)?; - // We can skip handle_migrations here because a new migration can only appear - // if we have a new version of the compute_ctl binary, which can only happen - // if compute got restarted, in which case we'll end up inside of apply_config - // instead of reconfigure. - } + let mut url = self.connstr.clone(); + url.query_pairs_mut() + .append_pair("application_name", "apply_config"); + let url = Arc::new(url); - // 'Close' connection - drop(client); + let spec = Arc::new(spec.clone()); + + self.apply_spec_sql(spec, url, 1)?; + } Ok(()) })?; diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index d27ae58fa2..ee4cf2dfa5 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -23,5 +23,6 @@ pub mod monitor; pub mod params; pub mod pg_helpers; pub mod spec; +mod spec_apply; pub mod swap; pub mod sync_sk; diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index b2dc265864..4a1e5ee0e8 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -10,9 +10,9 @@ use std::thread::JoinHandle; use std::time::{Duration, Instant}; use anyhow::{bail, Result}; +use futures::StreamExt; use ini::Ini; use notify::{RecursiveMode, Watcher}; -use postgres::{Client, Transaction}; use tokio::io::AsyncBufReadExt; use tokio::time::timeout; use tokio_postgres::NoTls; @@ -197,27 +197,34 @@ impl Escaping for PgIdent { } /// Build a list of existing Postgres roles -pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result> { - let postgres_roles = xact - .query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])? - .iter() +pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result> { + let postgres_roles = client + .query_raw::( + "SELECT rolname, rolpassword FROM pg_catalog.pg_authid", + &[], + ) + .await? + .filter_map(|row| async { row.ok() }) .map(|row| Role { name: row.get("rolname"), encrypted_password: row.get("rolpassword"), options: None, }) - .collect(); + .collect() + .await; Ok(postgres_roles) } /// Build a list of existing Postgres databases -pub fn get_existing_dbs(client: &mut Client) -> Result> { +pub async fn get_existing_dbs_async( + client: &tokio_postgres::Client, +) -> Result> { // `pg_database.datconnlimit = -2` means that the database is in the // invalid state. See: // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 - let postgres_dbs: Vec = client - .query( + let rowstream = client + .query_raw::( "SELECT datname AS name, datdba::regrole::text AS owner, @@ -226,8 +233,11 @@ pub fn get_existing_dbs(client: &mut Client) -> Result FROM pg_catalog.pg_database;", &[], - )? - .iter() + ) + .await?; + + let dbs_map = rowstream + .filter_map(|r| async { r.ok() }) .map(|row| Database { name: row.get("name"), owner: row.get("owner"), @@ -235,12 +245,9 @@ pub fn get_existing_dbs(client: &mut Client) -> Result invalid: row.get("invalid"), options: None, }) - .collect(); - - let dbs_map = postgres_dbs - .iter() .map(|db| (db.name.clone(), db.clone())) - .collect::>(); + .collect::>() + .await; Ok(dbs_map) } diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 73f3d1006a..c7d2deb090 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -1,22 +1,17 @@ -use std::collections::HashSet; +use anyhow::{anyhow, bail, Result}; +use postgres::Client; +use reqwest::StatusCode; use std::fs::File; use std::path::Path; -use std::str::FromStr; - -use anyhow::{anyhow, bail, Context, Result}; -use postgres::config::Config; -use postgres::{Client, NoTls}; -use reqwest::StatusCode; -use tracing::{error, info, info_span, instrument, span_enabled, warn, Level}; +use tracing::{error, info, instrument, warn}; use crate::config; -use crate::logger::inlinify; use crate::migration::MigrationRunner; use crate::params::PG_HBA_ALL_MD5; use crate::pg_helpers::*; use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse}; -use compute_api::spec::{ComputeSpec, PgIdent, Role}; +use compute_api::spec::ComputeSpec; // Do control plane request and return response if any. In case of error it // returns a bool flag indicating whether it makes sense to retry the request @@ -151,625 +146,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> { Ok(()) } -/// Compute could be unexpectedly shut down, for example, during the -/// database dropping. This leaves the database in the invalid state, -/// which prevents new db creation with the same name. This function -/// will clean it up before proceeding with catalog updates. All -/// possible future cleanup operations may go here too. -#[instrument(skip_all)] -pub fn cleanup_instance(client: &mut Client) -> Result<()> { - let existing_dbs = get_existing_dbs(client)?; - - for (_, db) in existing_dbs { - if db.invalid { - // After recent commit in Postgres, interrupted DROP DATABASE - // leaves the database in the invalid state. According to the - // commit message, the only option for user is to drop it again. - // See: - // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 - // - // Postgres Neon extension is done the way, that db is de-registered - // in the control plane metadata only after it is dropped. So there is - // a chance that it still thinks that db should exist. This means - // that it will be re-created by `handle_databases()`. Yet, it's fine - // as user can just repeat drop (in vanilla Postgres they would need - // to do the same, btw). - let query = format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()); - info!("dropping invalid database {}", db.name); - client.execute(query.as_str(), &[])?; - } - } - - Ok(()) -} - -/// Given a cluster spec json and open transaction it handles roles creation, -/// deletion and update. -#[instrument(skip_all)] -pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> { - let mut xact = client.transaction()?; - let existing_roles: Vec = get_existing_roles(&mut xact)?; - - let mut jwks_roles = HashSet::new(); - if let Some(local_proxy) = &spec.local_proxy_config { - for jwks_setting in local_proxy.jwks.iter().flatten() { - for role_name in &jwks_setting.role_names { - jwks_roles.insert(role_name.clone()); - } - } - } - - // Print a list of existing Postgres roles (only in debug mode) - if span_enabled!(Level::INFO) { - let mut vec = Vec::new(); - for r in &existing_roles { - vec.push(format!( - "{}:{}", - r.name, - if r.encrypted_password.is_some() { - "[FILTERED]" - } else { - "(null)" - } - )); - } - - info!("postgres roles (total {}): {:?}", vec.len(), vec); - } - - // Process delta operations first - if let Some(ops) = &spec.delta_operations { - info!("processing role renames"); - for op in ops { - match op.action.as_ref() { - "delete_role" => { - // no-op now, roles will be deleted at the end of configuration - } - // Renaming role drops its password, since role name is - // used as a salt there. It is important that this role - // is recorded with a new `name` in the `roles` list. - // Follow up roles update will set the new password. - "rename_role" => { - let new_name = op.new_name.as_ref().unwrap(); - - // XXX: with a limited number of roles it is fine, but consider making it a HashMap - if existing_roles.iter().any(|r| r.name == op.name) { - let query: String = format!( - "ALTER ROLE {} RENAME TO {}", - op.name.pg_quote(), - new_name.pg_quote() - ); - - warn!("renaming role '{}' to '{}'", op.name, new_name); - xact.execute(query.as_str(), &[])?; - } - } - _ => {} - } - } - } - - // Refresh Postgres roles info to handle possible roles renaming - let existing_roles: Vec = get_existing_roles(&mut xact)?; - - info!( - "handling cluster spec roles (total {})", - spec.cluster.roles.len() - ); - for role in &spec.cluster.roles { - let name = &role.name; - // XXX: with a limited number of roles it is fine, but consider making it a HashMap - let pg_role = existing_roles.iter().find(|r| r.name == *name); - - enum RoleAction { - None, - Update, - Create, - } - let action = if let Some(r) = pg_role { - if (r.encrypted_password.is_none() && role.encrypted_password.is_some()) - || (r.encrypted_password.is_some() && role.encrypted_password.is_none()) - { - RoleAction::Update - } else if let Some(pg_pwd) = &r.encrypted_password { - // Check whether password changed or not (trim 'md5' prefix first if any) - // - // This is a backward compatibility hack, which comes from the times when we were using - // md5 for everyone and hashes were stored in the console db without md5 prefix. So when - // role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix, - // but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix. - // Here is the only place so far where we compare hashes, so it seems to be the best candidate - // to place this compatibility layer. - let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") { - stripped - } else { - pg_pwd - }; - if pg_pwd != *role.encrypted_password.as_ref().unwrap() { - RoleAction::Update - } else { - RoleAction::None - } - } else { - RoleAction::None - } - } else { - RoleAction::Create - }; - - match action { - RoleAction::None => {} - RoleAction::Update => { - // This can be run on /every/ role! Not just ones created through the console. - // This means that if you add some funny ALTER here that adds a permission, - // this will get run even on user-created roles! This will result in different - // behavior before and after a spec gets reapplied. The below ALTER as it stands - // now only grants LOGIN and changes the password. Please do not allow this branch - // to do anything silly. - let mut query: String = format!("ALTER ROLE {} ", name.pg_quote()); - query.push_str(&role.to_pg_options()); - xact.execute(query.as_str(), &[])?; - } - RoleAction::Create => { - // This branch only runs when roles are created through the console, so it is - // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited - // from neon_superuser. - let mut query: String = format!( - "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser", - name.pg_quote() - ); - if jwks_roles.contains(name.as_str()) { - query = format!("CREATE ROLE {}", name.pg_quote()); - } - info!("running role create query: '{}'", &query); - query.push_str(&role.to_pg_options()); - xact.execute(query.as_str(), &[])?; - } - } - - if span_enabled!(Level::INFO) { - let pwd = if role.encrypted_password.is_some() { - "[FILTERED]" - } else { - "(null)" - }; - let action_str = match action { - RoleAction::None => "", - RoleAction::Create => " -> create", - RoleAction::Update => " -> update", - }; - info!(" - {}:{}{}", name, pwd, action_str); - } - } - - xact.commit()?; - - Ok(()) -} - -/// Reassign all dependent objects and delete requested roles. -#[instrument(skip_all)] -pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> { - if let Some(ops) = &spec.delta_operations { - // First, reassign all dependent objects to db owners. - info!("reassigning dependent objects of to-be-deleted roles"); - - // Fetch existing roles. We could've exported and used `existing_roles` from - // `handle_roles()`, but we only make this list there before creating new roles. - // Which is probably fine as we never create to-be-deleted roles, but that'd - // just look a bit untidy. Anyway, the entire `pg_roles` should be in shared - // buffers already, so this shouldn't be a big deal. - let mut xact = client.transaction()?; - let existing_roles: Vec = get_existing_roles(&mut xact)?; - xact.commit()?; - - for op in ops { - // Check that role is still present in Postgres, as this could be a - // restart with the same spec after role deletion. - if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) { - reassign_owned_objects(spec, connstr, &op.name)?; - } - } - - // Second, proceed with role deletions. - info!("processing role deletions"); - let mut xact = client.transaction()?; - for op in ops { - // We do not check either role exists or not, - // Postgres will take care of it for us - if op.action == "delete_role" { - let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote()); - - warn!("deleting role '{}'", &op.name); - xact.execute(query.as_str(), &[])?; - } - } - xact.commit()?; - } - - Ok(()) -} - -fn reassign_owned_objects_in_one_db( - conf: Config, - role_name: &PgIdent, - db_owner: &PgIdent, -) -> Result<()> { - let mut client = conf.connect(NoTls)?; - - // This will reassign all dependent objects to the db owner - let reassign_query = format!( - "REASSIGN OWNED BY {} TO {}", - role_name.pg_quote(), - db_owner.pg_quote() - ); - info!( - "reassigning objects owned by '{}' in db '{}' to '{}'", - role_name, - conf.get_dbname().unwrap_or(""), - db_owner - ); - client.simple_query(&reassign_query)?; - - // This now will only drop privileges of the role - let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote()); - client.simple_query(&drop_query)?; - Ok(()) -} - -// Reassign all owned objects in all databases to the owner of the database. -fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> { - for db in &spec.cluster.databases { - if db.owner != *role_name { - let mut conf = Config::from_str(connstr)?; - conf.dbname(&db.name); - reassign_owned_objects_in_one_db(conf, role_name, &db.owner)?; - } - } - - // Also handle case when there are no databases in the spec. - // In this case we need to reassign objects in the default database. - let conf = Config::from_str(connstr)?; - let db_owner = PgIdent::from_str("cloud_admin")?; - reassign_owned_objects_in_one_db(conf, role_name, &db_owner)?; - - Ok(()) -} - -/// It follows mostly the same logic as `handle_roles()` excepting that we -/// does not use an explicit transactions block, since major database operations -/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level -/// atomicity should be enough here due to the order of operations and various checks, -/// which together provide us idempotency. -#[instrument(skip_all)] -pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> { - let existing_dbs = get_existing_dbs(client)?; - - // Print a list of existing Postgres databases (only in debug mode) - if span_enabled!(Level::INFO) { - let mut vec = Vec::new(); - for (dbname, db) in &existing_dbs { - vec.push(format!("{}:{}", dbname, db.owner)); - } - info!("postgres databases (total {}): {:?}", vec.len(), vec); - } - - // Process delta operations first - if let Some(ops) = &spec.delta_operations { - info!("processing delta operations on databases"); - for op in ops { - match op.action.as_ref() { - // We do not check either DB exists or not, - // Postgres will take care of it for us - "delete_db" => { - // In Postgres we can't drop a database if it is a template. - // So we need to unset the template flag first, but it could - // be a retry, so we could've already dropped the database. - // Check that database exists first to make it idempotent. - let unset_template_query: String = format!( - " - DO $$ - BEGIN - IF EXISTS( - SELECT 1 - FROM pg_catalog.pg_database - WHERE datname = {} - ) - THEN - ALTER DATABASE {} is_template false; - END IF; - END - $$;", - escape_literal(&op.name), - &op.name.pg_quote() - ); - // Use FORCE to drop database even if there are active connections. - // We run this from `cloud_admin`, so it should have enough privileges. - // NB: there could be other db states, which prevent us from dropping - // the database. For example, if db is used by any active subscription - // or replication slot. - // TODO: deal with it once we allow logical replication. Proper fix should - // involve returning an error code to the control plane, so it could - // figure out that this is a non-retryable error, return it to the user - // and fail operation permanently. - let drop_db_query: String = format!( - "DROP DATABASE IF EXISTS {} WITH (FORCE)", - &op.name.pg_quote() - ); - - warn!("deleting database '{}'", &op.name); - client.execute(unset_template_query.as_str(), &[])?; - client.execute(drop_db_query.as_str(), &[])?; - } - "rename_db" => { - let new_name = op.new_name.as_ref().unwrap(); - - if existing_dbs.contains_key(&op.name) { - let query: String = format!( - "ALTER DATABASE {} RENAME TO {}", - op.name.pg_quote(), - new_name.pg_quote() - ); - - warn!("renaming database '{}' to '{}'", op.name, new_name); - client.execute(query.as_str(), &[])?; - } - } - _ => {} - } - } - } - - // Refresh Postgres databases info to handle possible renames - let existing_dbs = get_existing_dbs(client)?; - - info!( - "handling cluster spec databases (total {})", - spec.cluster.databases.len() - ); - for db in &spec.cluster.databases { - let name = &db.name; - let pg_db = existing_dbs.get(name); - - enum DatabaseAction { - None, - Update, - Create, - } - let action = if let Some(r) = pg_db { - // XXX: db owner name is returned as quoted string from Postgres, - // when quoting is needed. - let new_owner = if r.owner.starts_with('"') { - db.owner.pg_quote() - } else { - db.owner.clone() - }; - - if new_owner != r.owner { - // Update the owner - DatabaseAction::Update - } else { - DatabaseAction::None - } - } else { - DatabaseAction::Create - }; - - match action { - DatabaseAction::None => {} - DatabaseAction::Update => { - let query: String = format!( - "ALTER DATABASE {} OWNER TO {}", - name.pg_quote(), - db.owner.pg_quote() - ); - let _guard = info_span!("executing", query).entered(); - client.execute(query.as_str(), &[])?; - } - DatabaseAction::Create => { - let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote()); - query.push_str(&db.to_pg_options()); - let _guard = info_span!("executing", query).entered(); - client.execute(query.as_str(), &[])?; - let grant_query: String = format!( - "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser", - name.pg_quote() - ); - client.execute(grant_query.as_str(), &[])?; - } - }; - - if span_enabled!(Level::INFO) { - let action_str = match action { - DatabaseAction::None => "", - DatabaseAction::Create => " -> create", - DatabaseAction::Update => " -> update", - }; - info!(" - {}:{}{}", db.name, db.owner, action_str); - } - } - - Ok(()) -} - -/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants -/// to allow users creating trusted extensions and re-creating `public` schema, for example. -#[instrument(skip_all)] -pub fn handle_grants( - spec: &ComputeSpec, - client: &mut Client, - connstr: &str, - enable_anon_extension: bool, -) -> Result<()> { - info!("modifying database permissions"); - let existing_dbs = get_existing_dbs(client)?; - - // Do some per-database access adjustments. We'd better do this at db creation time, - // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants - // atomically. - for db in &spec.cluster.databases { - match existing_dbs.get(&db.name) { - Some(pg_db) => { - if pg_db.restrict_conn || pg_db.invalid { - info!( - "skipping grants for db {} (invalid: {}, connections not allowed: {})", - db.name, pg_db.invalid, pg_db.restrict_conn - ); - continue; - } - } - None => { - bail!( - "database {} doesn't exist in Postgres after handle_databases()", - db.name - ); - } - } - - let mut conf = Config::from_str(connstr)?; - conf.dbname(&db.name); - - let mut db_client = conf.connect(NoTls)?; - - // This will only change ownership on the schema itself, not the objects - // inside it. Without it owner of the `public` schema will be `cloud_admin` - // and database owner cannot do anything with it. SQL procedure ensures - // that it won't error out if schema `public` doesn't exist. - let alter_query = format!( - "DO $$\n\ - DECLARE\n\ - schema_owner TEXT;\n\ - BEGIN\n\ - IF EXISTS(\n\ - SELECT nspname\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - )\n\ - THEN\n\ - SELECT nspowner::regrole::text\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - INTO schema_owner;\n\ - \n\ - IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\ - THEN\n\ - ALTER SCHEMA public OWNER TO {};\n\ - END IF;\n\ - END IF;\n\ - END\n\ - $$;", - db.owner.pg_quote() - ); - db_client.simple_query(&alter_query)?; - - // Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user. - // This is needed because since postgres 15 this privilege is removed by default. - // TODO: web_access isn't created for almost 1 year. It could be that we have - // active users of 1 year old projects, but hopefully not, so check it and - // remove this code if possible. The worst thing that could happen is that - // user won't be able to use public schema in NEW databases created in the - // very OLD project. - // - // Also, alter default permissions so that relations created by extensions can be - // used by neon_superuser without permission issues. - let grant_query = "DO $$\n\ - BEGIN\n\ - IF EXISTS(\n\ - SELECT nspname\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - ) AND\n\ - current_setting('server_version_num')::int/10000 >= 15\n\ - THEN\n\ - IF EXISTS(\n\ - SELECT rolname\n\ - FROM pg_catalog.pg_roles\n\ - WHERE rolname = 'web_access'\n\ - )\n\ - THEN\n\ - GRANT CREATE ON SCHEMA public TO web_access;\n\ - END IF;\n\ - END IF;\n\ - IF EXISTS(\n\ - SELECT nspname\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - )\n\ - THEN\n\ - ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n\ - ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n\ - END IF;\n\ - END\n\ - $$;" - .to_string(); - - info!( - "grant query for db {} : {}", - &db.name, - inlinify(&grant_query) - ); - db_client.simple_query(&grant_query)?; - - // it is important to run this after all grants - if enable_anon_extension { - handle_extension_anon(spec, &db.owner, &mut db_client, false) - .context("handle_grants handle_extension_anon")?; - } - } - - Ok(()) -} - -/// Create required system extensions -#[instrument(skip_all)] -pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> { - if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") { - if libs.contains("pg_stat_statements") { - // Create extension only if this compute really needs it - let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements"; - info!("creating system extensions with query: {}", query); - client.simple_query(query)?; - } - } - - Ok(()) -} - -/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database -#[instrument(skip_all)] -pub fn handle_extension_neon(client: &mut Client) -> Result<()> { - info!("handle extension neon"); - - let mut query = "CREATE SCHEMA IF NOT EXISTS neon"; - client.simple_query(query)?; - - query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"; - info!("create neon extension with query: {}", query); - client.simple_query(query)?; - - query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'"; - client.simple_query(query)?; - - query = "ALTER EXTENSION neon SET SCHEMA neon"; - info!("alter neon extension schema with query: {}", query); - client.simple_query(query)?; - - // this will be a no-op if extension is already up to date, - // which may happen in two cases: - // - extension was just installed - // - extension was already installed and is up to date - let query = "ALTER EXTENSION neon UPDATE"; - info!("update neon extension version with query: {}", query); - if let Err(e) = client.simple_query(query) { - error!( - "failed to upgrade neon extension during `handle_extension_neon`: {}", - e - ); - } - - Ok(()) -} - #[instrument(skip_all)] pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> { info!("handle neon extension upgrade"); diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs new file mode 100644 index 0000000000..7308d5d36e --- /dev/null +++ b/compute_tools/src/spec_apply.rs @@ -0,0 +1,680 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Formatter}; +use std::future::Future; +use std::iter::empty; +use std::iter::once; +use std::sync::Arc; + +use crate::compute::construct_superuser_query; +use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt}; +use anyhow::{bail, Result}; +use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role}; +use futures::future::join_all; +use tokio::sync::RwLock; +use tokio_postgres::Client; +use tracing::{debug, info_span, Instrument}; + +#[derive(Clone)] +pub enum DB { + SystemDB, + UserDB(Database), +} + +impl DB { + pub fn new(db: Database) -> DB { + Self::UserDB(db) + } + + pub fn is_owned_by(&self, role: &PgIdent) -> bool { + match self { + DB::SystemDB => false, + DB::UserDB(db) => &db.owner == role, + } + } +} + +impl Debug for DB { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + DB::SystemDB => f.debug_tuple("SystemDB").finish(), + DB::UserDB(db) => f.debug_tuple("UserDB").field(&db.name).finish(), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub enum PerDatabasePhase { + DeleteDBRoleReferences, + ChangeSchemaPerms, + HandleAnonExtension, +} + +#[derive(Clone, Debug)] +pub enum ApplySpecPhase { + CreateSuperUser, + DropInvalidDatabases, + RenameRoles, + CreateAndAlterRoles, + RenameAndDeleteDatabases, + CreateAndAlterDatabases, + RunInEachDatabase { db: DB, subphase: PerDatabasePhase }, + HandleOtherExtensions, + HandleNeonExtension, + CreateAvailabilityCheck, + DropRoles, +} + +pub struct Operation { + pub query: String, + pub comment: Option, +} + +pub struct MutableApplyContext { + pub roles: HashMap, + pub dbs: HashMap, +} + +/// Appply the operations that belong to the given spec apply phase. +/// +/// Commands within a single phase are executed in order of Iterator yield. +/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database +/// indicated by its `db` field, and can share a single client for all changes +/// to that database. +/// +/// Notes: +/// - Commands are pipelined, and thus may cause incomplete apply if one +/// command of many fails. +/// - Failing commands will fail the phase's apply step once the return value +/// is processed. +/// - No timeouts have (yet) been implemented. +/// - The caller is responsible for limiting and/or applying concurrency. +pub async fn apply_operations<'a, Fut, F>( + spec: Arc, + ctx: Arc>, + jwks_roles: Arc>, + apply_spec_phase: ApplySpecPhase, + client: F, +) -> Result<()> +where + F: FnOnce() -> Fut, + Fut: Future>, +{ + debug!("Starting phase {:?}", &apply_spec_phase); + let span = info_span!("db_apply_changes", phase=?apply_spec_phase); + let span2 = span.clone(); + async move { + debug!("Processing phase {:?}", &apply_spec_phase); + let ctx = ctx; + + let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase) + .await? + .peekable(); + + // Return (and by doing so, skip requesting the PostgreSQL client) if + // we don't have any operations scheduled. + if ops.peek().is_none() { + return Ok(()); + } + + let client = client().await?; + + debug!("Applying phase {:?}", &apply_spec_phase); + + let active_queries = ops + .map(|op| { + let Operation { comment, query } = op; + let inspan = match comment { + None => span.clone(), + Some(comment) => info_span!("phase {}: {}", comment), + }; + + async { + let query = query; + let res = client.simple_query(&query).await; + debug!( + "{} {}", + if res.is_ok() { + "successfully executed" + } else { + "failed to execute" + }, + query + ); + res + } + .instrument(inspan) + }) + .collect::>(); + + drop(ctx); + + for it in join_all(active_queries).await { + drop(it?); + } + + debug!("Completed phase {:?}", &apply_spec_phase); + + Ok(()) + } + .instrument(span2) + .await +} + +/// Create a stream of operations to be executed for that phase of applying +/// changes. +/// +/// In the future we may generate a single stream of changes and then +/// sort/merge/batch execution, but for now this is a nice way to improve +/// batching behaviour of the commands. +async fn get_operations<'a>( + spec: &'a ComputeSpec, + ctx: &'a RwLock, + jwks_roles: &'a HashSet, + apply_spec_phase: &'a ApplySpecPhase, +) -> Result + 'a + Send>> { + match apply_spec_phase { + ApplySpecPhase::CreateSuperUser => { + let query = construct_superuser_query(spec); + + Ok(Box::new(once(Operation { + query, + comment: None, + }))) + } + ApplySpecPhase::DropInvalidDatabases => { + let mut ctx = ctx.write().await; + let databases = &mut ctx.dbs; + + let keys: Vec<_> = databases + .iter() + .filter(|(_, db)| db.invalid) + .map(|(dbname, _)| dbname.clone()) + .collect(); + + // After recent commit in Postgres, interrupted DROP DATABASE + // leaves the database in the invalid state. According to the + // commit message, the only option for user is to drop it again. + // See: + // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 + // + // Postgres Neon extension is done the way, that db is de-registered + // in the control plane metadata only after it is dropped. So there is + // a chance that it still thinks that the db should exist. This means + // that it will be re-created by the `CreateDatabases` phase. This + // is fine, as user can just drop the table again (in vanilla + // Postgres they would need to do the same). + let operations = keys + .into_iter() + .filter_map(move |dbname| ctx.dbs.remove(&dbname)) + .map(|db| Operation { + query: format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()), + comment: Some(format!("Dropping invalid database {}", db.name)), + }); + + Ok(Box::new(operations)) + } + ApplySpecPhase::RenameRoles => { + let mut ctx = ctx.write().await; + + let operations = spec + .delta_operations + .iter() + .flatten() + .filter(|op| op.action == "rename_role") + .filter_map(move |op| { + let roles = &mut ctx.roles; + + if roles.contains_key(op.name.as_str()) { + None + } else { + let new_name = op.new_name.as_ref().unwrap(); + let mut role = roles.remove(op.name.as_str()).unwrap(); + + role.name = new_name.clone(); + role.encrypted_password = None; + roles.insert(role.name.clone(), role); + + Some(Operation { + query: format!( + "ALTER ROLE {} RENAME TO {}", + op.name.pg_quote(), + new_name.pg_quote() + ), + comment: Some(format!("renaming role '{}' to '{}'", op.name, new_name)), + }) + } + }); + + Ok(Box::new(operations)) + } + ApplySpecPhase::CreateAndAlterRoles => { + let mut ctx = ctx.write().await; + + let operations = spec.cluster.roles + .iter() + .filter_map(move |role| { + let roles = &mut ctx.roles; + let db_role = roles.get(&role.name); + + match db_role { + Some(db_role) => { + if db_role.encrypted_password != role.encrypted_password { + // This can be run on /every/ role! Not just ones created through the console. + // This means that if you add some funny ALTER here that adds a permission, + // this will get run even on user-created roles! This will result in different + // behavior before and after a spec gets reapplied. The below ALTER as it stands + // now only grants LOGIN and changes the password. Please do not allow this branch + // to do anything silly. + Some(Operation { + query: format!( + "ALTER ROLE {} {}", + role.name.pg_quote(), + role.to_pg_options(), + ), + comment: None, + }) + } else { + None + } + } + None => { + let query = if !jwks_roles.contains(role.name.as_str()) { + format!( + "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}", + role.name.pg_quote(), + role.to_pg_options(), + ) + } else { + format!( + "CREATE ROLE {} {}", + role.name.pg_quote(), + role.to_pg_options(), + ) + }; + Some(Operation { + query, + comment: Some(format!("creating role {}", role.name)), + }) + } + } + }); + + Ok(Box::new(operations)) + } + ApplySpecPhase::RenameAndDeleteDatabases => { + let mut ctx = ctx.write().await; + + let operations = spec + .delta_operations + .iter() + .flatten() + .filter_map(move |op| { + let databases = &mut ctx.dbs; + match op.action.as_str() { + // We do not check whether the DB exists or not, + // Postgres will take care of it for us + "delete_db" => { + // In Postgres we can't drop a database if it is a template. + // So we need to unset the template flag first, but it could + // be a retry, so we could've already dropped the database. + // Check that database exists first to make it idempotent. + let unset_template_query: String = format!( + include_str!("sql/unset_template_for_drop_dbs.sql"), + datname_str = escape_literal(&op.name), + datname = &op.name.pg_quote() + ); + + // Use FORCE to drop database even if there are active connections. + // We run this from `cloud_admin`, so it should have enough privileges. + // NB: there could be other db states, which prevent us from dropping + // the database. For example, if db is used by any active subscription + // or replication slot. + // TODO: deal with it once we allow logical replication. Proper fix should + // involve returning an error code to the control plane, so it could + // figure out that this is a non-retryable error, return it to the user + // and fail operation permanently. + let drop_db_query: String = format!( + "DROP DATABASE IF EXISTS {} WITH (FORCE)", + &op.name.pg_quote() + ); + + databases.remove(&op.name); + + Some(vec![ + Operation { + query: unset_template_query, + comment: Some(format!( + "optionally clearing template flags for DB {}", + op.name, + )), + }, + Operation { + query: drop_db_query, + comment: Some(format!("deleting database {}", op.name,)), + }, + ]) + } + "rename_db" => { + if let Some(mut db) = databases.remove(&op.name) { + // update state of known databases + let new_name = op.new_name.as_ref().unwrap(); + db.name = new_name.clone(); + databases.insert(db.name.clone(), db); + + Some(vec![Operation { + query: format!( + "ALTER DATABASE {} RENAME TO {}", + op.name.pg_quote(), + new_name.pg_quote(), + ), + comment: Some(format!( + "renaming database '{}' to '{}'", + op.name, new_name + )), + }]) + } else { + None + } + } + _ => None, + } + }) + .flatten(); + + Ok(Box::new(operations)) + } + ApplySpecPhase::CreateAndAlterDatabases => { + let mut ctx = ctx.write().await; + + let operations = spec + .cluster + .databases + .iter() + .filter_map(move |db| { + let databases = &mut ctx.dbs; + if let Some(edb) = databases.get_mut(&db.name) { + let change_owner = if edb.owner.starts_with('"') { + db.owner.pg_quote() != edb.owner + } else { + db.owner != edb.owner + }; + + edb.owner = db.owner.clone(); + + if change_owner { + Some(vec![Operation { + query: format!( + "ALTER DATABASE {} OWNER TO {}", + db.name.pg_quote(), + db.owner.pg_quote() + ), + comment: Some(format!( + "changing database owner of database {} to {}", + db.name, db.owner + )), + }]) + } else { + None + } + } else { + databases.insert(db.name.clone(), db.clone()); + + Some(vec![ + Operation { + query: format!( + "CREATE DATABASE {} {}", + db.name.pg_quote(), + db.to_pg_options(), + ), + comment: None, + }, + Operation { + query: format!( + "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser", + db.name.pg_quote() + ), + comment: None, + }, + ]) + } + }) + .flatten(); + + Ok(Box::new(operations)) + } + ApplySpecPhase::RunInEachDatabase { db, subphase } => { + match subphase { + PerDatabasePhase::DeleteDBRoleReferences => { + let ctx = ctx.read().await; + + let operations = + spec.delta_operations + .iter() + .flatten() + .filter(|op| op.action == "delete_role") + .filter_map(move |op| { + if db.is_owned_by(&op.name) { + return None; + } + if !ctx.roles.contains_key(&op.name) { + return None; + } + let quoted = op.name.pg_quote(); + let new_owner = match &db { + DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(), + DB::UserDB(db) => db.owner.pg_quote(), + }; + + Some(vec![ + // This will reassign all dependent objects to the db owner + Operation { + query: format!( + "REASSIGN OWNED BY {} TO {}", + quoted, new_owner, + ), + comment: None, + }, + // This now will only drop privileges of the role + Operation { + query: format!("DROP OWNED BY {}", quoted), + comment: None, + }, + ]) + }) + .flatten(); + + Ok(Box::new(operations)) + } + PerDatabasePhase::ChangeSchemaPerms => { + let ctx = ctx.read().await; + let databases = &ctx.dbs; + + let db = match &db { + // ignore schema permissions on the system database + DB::SystemDB => return Ok(Box::new(empty())), + DB::UserDB(db) => db, + }; + + if databases.get(&db.name).is_none() { + bail!("database {} doesn't exist in PostgreSQL", db.name); + } + + let edb = databases.get(&db.name).unwrap(); + + if edb.restrict_conn || edb.invalid { + return Ok(Box::new(empty())); + } + + let operations = vec![ + Operation { + query: format!( + include_str!("sql/set_public_schema_owner.sql"), + db_owner = db.owner.pg_quote() + ), + comment: None, + }, + Operation { + query: String::from(include_str!("sql/default_grants.sql")), + comment: None, + }, + ] + .into_iter(); + + Ok(Box::new(operations)) + } + PerDatabasePhase::HandleAnonExtension => { + // Only install Anon into user databases + let db = match &db { + DB::SystemDB => return Ok(Box::new(empty())), + DB::UserDB(db) => db, + }; + // Never install Anon when it's not enabled as feature + if !spec.features.contains(&ComputeFeature::AnonExtension) { + return Ok(Box::new(empty())); + } + + // Only install Anon when it's added in preload libraries + let opt_libs = spec.cluster.settings.find("shared_preload_libraries"); + + let libs = match opt_libs { + Some(libs) => libs, + None => return Ok(Box::new(empty())), + }; + + if !libs.contains("anon") { + return Ok(Box::new(empty())); + } + + let db_owner = db.owner.pg_quote(); + + let operations = vec![ + // Create anon extension if this compute needs it + // Users cannot create it themselves, because superuser is required. + Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"), + comment: Some(String::from("creating anon extension")), + }, + // Initialize anon extension + // This also requires superuser privileges, so users cannot do it themselves. + Operation { + query: String::from("SELECT anon.init()"), + comment: Some(String::from("initializing anon extension data")), + }, + Operation { + query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner), + comment: Some(String::from( + "granting anon extension schema permissions", + )), + }, + Operation { + query: format!( + "GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", + db_owner + ), + comment: Some(String::from( + "granting anon extension schema functions permissions", + )), + }, + // We need this, because some functions are defined as SECURITY DEFINER. + // In Postgres SECURITY DEFINER functions are executed with the privileges + // of the owner. + // In anon extension this it is needed to access some GUCs, which are only accessible to + // superuser. But we've patched postgres to allow db_owner to access them as well. + // So we need to change owner of these functions to db_owner. + Operation { + query: format!( + include_str!("sql/anon_ext_fn_reassign.sql"), + db_owner = db_owner, + ), + comment: Some(String::from( + "change anon extension functions owner to database_owner", + )), + }, + Operation { + query: format!( + "GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", + db_owner, + ), + comment: Some(String::from( + "granting anon extension tables permissions", + )), + }, + Operation { + query: format!( + "GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", + db_owner, + ), + comment: Some(String::from( + "granting anon extension sequences permissions", + )), + }, + ] + .into_iter(); + + Ok(Box::new(operations)) + } + } + } + // Interestingly, we only install p_s_s in the main database, even when + // it's preloaded. + ApplySpecPhase::HandleOtherExtensions => { + if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") { + if libs.contains("pg_stat_statements") { + return Ok(Box::new(once(Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"), + comment: Some(String::from("create system extensions")), + }))); + } + } + Ok(Box::new(empty())) + } + ApplySpecPhase::HandleNeonExtension => { + let operations = vec![ + Operation { + query: String::from("CREATE SCHEMA IF NOT EXISTS neon"), + comment: Some(String::from("init: add schema for extension")), + }, + Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"), + comment: Some(String::from( + "init: install the extension if not already installed", + )), + }, + Operation { + query: String::from( + "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'", + ), + comment: Some(String::from("compat/fix: make neon relocatable")), + }, + Operation { + query: String::from("ALTER EXTENSION neon SET SCHEMA neon"), + comment: Some(String::from("compat/fix: alter neon extension schema")), + }, + Operation { + query: String::from("ALTER EXTENSION neon UPDATE"), + comment: Some(String::from("compat/update: update neon extension version")), + }, + ] + .into_iter(); + + Ok(Box::new(operations)) + } + ApplySpecPhase::CreateAvailabilityCheck => Ok(Box::new(once(Operation { + query: String::from(include_str!("sql/add_availabilitycheck_tables.sql")), + comment: None, + }))), + ApplySpecPhase::DropRoles => { + let operations = spec + .delta_operations + .iter() + .flatten() + .filter(|op| op.action == "delete_role") + .map(|op| Operation { + query: format!("DROP ROLE IF EXISTS {}", op.name.pg_quote()), + comment: None, + }); + + Ok(Box::new(operations)) + } + } +} diff --git a/compute_tools/src/sql/add_availabilitycheck_tables.sql b/compute_tools/src/sql/add_availabilitycheck_tables.sql new file mode 100644 index 0000000000..7c60690c78 --- /dev/null +++ b/compute_tools/src/sql/add_availabilitycheck_tables.sql @@ -0,0 +1,18 @@ +DO $$ +BEGIN + IF NOT EXISTS( + SELECT 1 + FROM pg_catalog.pg_tables + WHERE tablename = 'health_check' + ) + THEN + CREATE TABLE health_check ( + id serial primary key, + updated_at timestamptz default now() + ); + INSERT INTO health_check VALUES (1, now()) + ON CONFLICT (id) DO UPDATE + SET updated_at = now(); + END IF; +END +$$ \ No newline at end of file diff --git a/compute_tools/src/sql/anon_ext_fn_reassign.sql b/compute_tools/src/sql/anon_ext_fn_reassign.sql new file mode 100644 index 0000000000..3d7b15c590 --- /dev/null +++ b/compute_tools/src/sql/anon_ext_fn_reassign.sql @@ -0,0 +1,12 @@ +DO $$ +DECLARE + query varchar; +BEGIN + FOR query IN SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {db_owner};' + FROM pg_proc p + JOIN pg_namespace nsp ON p.pronamespace = nsp.oid + WHERE nsp.nspname = 'anon' LOOP + EXECUTE query; + END LOOP; +END +$$; diff --git a/compute_tools/src/sql/default_grants.sql b/compute_tools/src/sql/default_grants.sql new file mode 100644 index 0000000000..58ebb0690b --- /dev/null +++ b/compute_tools/src/sql/default_grants.sql @@ -0,0 +1,30 @@ +DO +$$ + BEGIN + IF EXISTS( + SELECT nspname + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + ) AND + current_setting('server_version_num')::int / 10000 >= 15 + THEN + IF EXISTS( + SELECT rolname + FROM pg_catalog.pg_roles + WHERE rolname = 'web_access' + ) + THEN + GRANT CREATE ON SCHEMA public TO web_access; + END IF; + END IF; + IF EXISTS( + SELECT nspname + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + ) + THEN + ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION; + ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION; + END IF; + END +$$; \ No newline at end of file diff --git a/compute_tools/src/sql/set_public_schema_owner.sql b/compute_tools/src/sql/set_public_schema_owner.sql new file mode 100644 index 0000000000..fd061a713e --- /dev/null +++ b/compute_tools/src/sql/set_public_schema_owner.sql @@ -0,0 +1,23 @@ +DO +$$ + DECLARE + schema_owner TEXT; + BEGIN + IF EXISTS( + SELECT nspname + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + ) + THEN + SELECT nspowner::regrole::text + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + INTO schema_owner; + + IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin' + THEN + ALTER SCHEMA public OWNER TO {db_owner}; + END IF; + END IF; + END +$$; \ No newline at end of file diff --git a/compute_tools/src/sql/unset_template_for_drop_dbs.sql b/compute_tools/src/sql/unset_template_for_drop_dbs.sql new file mode 100644 index 0000000000..6c4343a589 --- /dev/null +++ b/compute_tools/src/sql/unset_template_for_drop_dbs.sql @@ -0,0 +1,12 @@ +DO $$ + BEGIN + IF EXISTS( + SELECT 1 + FROM pg_catalog.pg_database + WHERE datname = {datname_str} + ) + THEN + ALTER DATABASE {datname} is_template false; + END IF; + END +$$; \ No newline at end of file From 0a499a317614a049bab4e1166984557789460793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 20 Nov 2024 06:44:23 +0100 Subject: [PATCH 15/22] Don't preload offloaded timelines (#9646) In timeline preloading, we also do a preload for offloaded timelines. This includes the download of `index-part.json`. Ultimately, such a download is wasteful, therefore avoid it. Same goes for the remote client, we just discard it immediately thereafter. Part of #8088 --------- Co-authored-by: Christian Schwarz --- pageserver/src/tenant.rs | 71 +++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 37bf83c984..8e9e3890ba 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -249,7 +249,8 @@ struct TimelinePreload { pub(crate) struct TenantPreload { tenant_manifest: TenantManifest, - timelines: HashMap, + /// Map from timeline ID to a possible timeline preload. It is None iff the timeline is offloaded according to the manifest. + timelines: HashMap>, } /// When we spawn a tenant, there is a special mode for tenant creation that @@ -1397,7 +1398,7 @@ impl Tenant { // Get list of remote timelines // download index files for every tenant timeline info!("listing remote timelines"); - let (remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines( + let (mut remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines( remote_storage, self.tenant_shard_id, cancel.clone(), @@ -1431,11 +1432,27 @@ impl Tenant { warn!("Unexpected non timeline key {k}"); } + // Avoid downloading IndexPart of offloaded timelines. + let mut offloaded_with_prefix = HashSet::new(); + for offloaded in tenant_manifest.offloaded_timelines.iter() { + if remote_timeline_ids.remove(&offloaded.timeline_id) { + offloaded_with_prefix.insert(offloaded.timeline_id); + } else { + // We'll take care later of timelines in the manifest without a prefix + } + } + + let timelines = self + .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel) + .await?; + Ok(TenantPreload { tenant_manifest, - timelines: self - .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel) - .await?, + timelines: timelines + .into_iter() + .map(|(id, tl)| (id, Some(tl))) + .chain(offloaded_with_prefix.into_iter().map(|id| (id, None))) + .collect(), }) } @@ -1466,6 +1483,19 @@ impl Tenant { offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline))); offloaded_timeline_ids.insert(timeline_id); } + // Complete deletions for offloaded timeline id's from manifest. + // The manifest will be uploaded later in this function. + offloaded_timelines_list + .retain(|(offloaded_id, offloaded)| { + // Existence of a timeline is finally determined by the existence of an index-part.json in remote storage. + // If there is dangling references in another location, they need to be cleaned up. + let delete = !preload.timelines.contains_key(offloaded_id); + if delete { + tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found"); + offloaded.defuse_for_tenant_drop(); + } + !delete + }); let mut timelines_to_resume_deletions = vec![]; @@ -1473,10 +1503,9 @@ impl Tenant { let mut timeline_ancestors = HashMap::new(); let mut existent_timelines = HashSet::new(); for (timeline_id, preload) in preload.timelines { - if offloaded_timeline_ids.remove(&timeline_id) { - // The timeline is offloaded, skip loading it. - continue; - } + let Some(preload) = preload else { continue }; + // This is an invariant of the `preload` function's API + assert!(!offloaded_timeline_ids.contains(&timeline_id)); let index_part = match preload.index_part { Ok(i) => { debug!("remote index part exists for timeline {timeline_id}"); @@ -1586,31 +1615,13 @@ impl Tenant { .context("resume_deletion") .map_err(LoadLocalTimelineError::ResumeDeletion)?; } - // Complete deletions for offloaded timeline id's. - offloaded_timelines_list - .retain(|(offloaded_id, offloaded)| { - // At this point, offloaded_timeline_ids has the list of all offloaded timelines - // without a prefix in S3, so they are inexistent. - // In the end, existence of a timeline is finally determined by the existence of an index-part.json in remote storage. - // If there is a dangling reference in another location, they need to be cleaned up. - let delete = offloaded_timeline_ids.contains(offloaded_id); - if delete { - tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found"); - offloaded.defuse_for_tenant_drop(); - } - !delete - }); - if !offloaded_timelines_list.is_empty() { - tracing::info!( - "Tenant has {} offloaded timelines", - offloaded_timelines_list.len() - ); - } + let needs_manifest_upload = + offloaded_timelines_list.len() != preload.tenant_manifest.offloaded_timelines.len(); { let mut offloaded_timelines_accessor = self.timelines_offloaded.lock().unwrap(); offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter()); } - if !offloaded_timeline_ids.is_empty() { + if needs_manifest_upload { self.store_tenant_manifest().await?; } From 3ae0b2149e1a80975e098a4156b424e636cc550f Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Wed, 20 Nov 2024 10:14:28 +0000 Subject: [PATCH 16/22] chore(proxy): demote a ton of logs for successful connection attempts (#9803) See https://github.com/neondatabase/cloud/issues/14378 In collaboration with @cloneable and @awarus, we sifted through logs and simply demoted some logs to debug. This is not at all finished and there are more logs to review, but we ran out of time in the session we organised. In any slightly more nuanced cases, we didn't touch the log, instead leaving a TODO comment. --- proxy/src/auth/backend/classic.rs | 8 +++++--- proxy/src/auth/backend/hacks.rs | 2 +- proxy/src/auth/backend/mod.rs | 7 ++++--- proxy/src/auth/credentials.rs | 10 +++++----- proxy/src/auth/flow.rs | 2 ++ proxy/src/bin/local_proxy.rs | 1 + proxy/src/bin/proxy.rs | 1 + proxy/src/cancellation.rs | 10 +++++----- proxy/src/compute.rs | 5 +++-- proxy/src/console_redirect_proxy.rs | 2 +- proxy/src/context/mod.rs | 9 +++++++-- proxy/src/control_plane/client/mod.rs | 4 ++-- proxy/src/control_plane/client/neon.rs | 13 +++++++++---- proxy/src/stream.rs | 1 + 14 files changed, 47 insertions(+), 28 deletions(-) diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs index 6d26c99832..87a02133c8 100644 --- a/proxy/src/auth/backend/classic.rs +++ b/proxy/src/auth/backend/classic.rs @@ -1,5 +1,5 @@ use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use super::{ComputeCredentials, ComputeUserInfo}; use crate::auth::backend::ComputeCredentialKeys; @@ -21,11 +21,11 @@ pub(super) async fn authenticate( let scram_keys = match secret { #[cfg(any(test, feature = "testing"))] AuthSecret::Md5(_) => { - info!("auth endpoint chooses MD5"); + debug!("auth endpoint chooses MD5"); return Err(auth::AuthError::bad_auth_method("MD5")); } AuthSecret::Scram(secret) => { - info!("auth endpoint chooses SCRAM"); + debug!("auth endpoint chooses SCRAM"); let scram = auth::Scram(&secret, ctx); let auth_outcome = tokio::time::timeout( @@ -50,6 +50,8 @@ pub(super) async fn authenticate( let client_key = match auth_outcome { sasl::Outcome::Success(key) => key, sasl::Outcome::Failure(reason) => { + // TODO: warnings? + // TODO: should we get rid of this because double logging? info!("auth backend failed with an error: {reason}"); return Err(auth::AuthError::password_failed(&*creds.user)); } diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs index 1411d908a5..e651df1d34 100644 --- a/proxy/src/auth/backend/hacks.rs +++ b/proxy/src/auth/backend/hacks.rs @@ -73,7 +73,7 @@ pub(crate) async fn password_hack_no_authentication( .get_password() .await?; - info!(project = &*payload.endpoint, "received missing parameter"); + debug!(project = &*payload.endpoint, "received missing parameter"); // Report tentative success; compute node will check the password anyway. Ok(( diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 242fe99de2..83c72e7be0 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -14,7 +14,7 @@ use ipnet::{Ipv4Net, Ipv6Net}; use local::LocalBackend; use tokio::io::{AsyncRead, AsyncWrite}; use tokio_postgres::config::AuthKeys; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint}; @@ -286,7 +286,7 @@ async fn auth_quirks( Ok(info) => (info, None), }; - info!("fetching user's authentication info"); + debug!("fetching user's authentication info"); let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?; // check allowed list @@ -404,7 +404,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { ) -> auth::Result> { let res = match self { Self::ControlPlane(api, user_info) => { - info!( + debug!( user = &*user_info.user, project = user_info.endpoint(), "performing authentication using the console" @@ -427,6 +427,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { } }; + // TODO: replace with some metric info!("user successfully authenticated"); Ok(res) } diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index ddecae6af5..dab9007400 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -7,7 +7,7 @@ use std::str::FromStr; use itertools::Itertools; use pq_proto::StartupMessageParams; use thiserror::Error; -use tracing::{info, warn}; +use tracing::{debug, warn}; use crate::auth::password_hack::parse_endpoint_param; use crate::context::RequestMonitoring; @@ -147,22 +147,22 @@ impl ComputeUserInfoMaybeEndpoint { } let metrics = Metrics::get(); - info!(%user, "credentials"); + debug!(%user, "credentials"); if sni.is_some() { - info!("Connection with sni"); + debug!("Connection with sni"); metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni); } else if endpoint.is_some() { metrics .proxy .accepted_connections_by_sni .inc(SniKind::NoSni); - info!("Connection without sni"); + debug!("Connection without sni"); } else { metrics .proxy .accepted_connections_by_sni .inc(SniKind::PasswordHack); - info!("Connection with password hack"); + debug!("Connection with password hack"); } let options = NeonOptions::parse_params(params); diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs index 6294549ff6..1740b59b14 100644 --- a/proxy/src/auth/flow.rs +++ b/proxy/src/auth/flow.rs @@ -178,6 +178,8 @@ impl AuthFlow<'_, S, Scram<'_>> { SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus), _ => {} } + + // TODO: make this a metric instead info!("client chooses {}", sasl.method); let outcome = sasl::SaslStream::new(self.stream, sasl.message) diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs index 41b0e11e85..c4ec1300f2 100644 --- a/proxy/src/bin/local_proxy.rs +++ b/proxy/src/bin/local_proxy.rs @@ -125,6 +125,7 @@ async fn main() -> anyhow::Result<()> { Metrics::install(Arc::new(ThreadPoolMetrics::new(0))); + // TODO: refactor these to use labels debug!("Version: {GIT_VERSION}"); debug!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index fda5b25961..232721338d 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -288,6 +288,7 @@ async fn main() -> anyhow::Result<()> { let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); + // TODO: refactor these to use labels info!("Version: {GIT_VERSION}"); info!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index db0970adcb..3ad2d55b53 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -7,7 +7,7 @@ use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; use tokio_postgres::{CancelToken, NoTls}; -use tracing::info; +use tracing::{debug, info}; use uuid::Uuid; use crate::error::ReportableError; @@ -73,7 +73,7 @@ impl CancellationHandler

{ break key; }; - info!("registered new query cancellation key {key}"); + debug!("registered new query cancellation key {key}"); Session { key, cancellation_handler: self, @@ -165,7 +165,7 @@ impl CancelClosure { pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; self.cancel_token.cancel_query_raw(socket, NoTls).await?; - info!("query was cancelled"); + debug!("query was cancelled"); Ok(()) } } @@ -182,7 +182,7 @@ impl

Session

{ /// Store the cancel token for the given session. /// This enables query cancellation in `crate::proxy::prepare_client_connection`. pub(crate) fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData { - info!("enabling query cancellation for this session"); + debug!("enabling query cancellation for this session"); self.cancellation_handler .map .insert(self.key, Some(cancel_closure)); @@ -194,7 +194,7 @@ impl

Session

{ impl

Drop for Session

{ fn drop(&mut self) { self.cancellation_handler.map.remove(&self.key); - info!("dropped query cancellation key {}", &self.key); + debug!("dropped query cancellation key {}", &self.key); } } diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index ca4a348ed8..b8876b44eb 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -14,7 +14,7 @@ use thiserror::Error; use tokio::net::TcpStream; use tokio_postgres::tls::MakeTlsConnect; use tokio_postgres_rustls::MakeRustlsConnect; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; use crate::auth::parse_endpoint_param; use crate::cancellation::CancelClosure; @@ -213,7 +213,7 @@ impl ConnCfg { }; let connect_once = |host, port| { - info!("trying to connect to compute node at {host}:{port}"); + debug!("trying to connect to compute node at {host}:{port}"); connect_with_timeout(host, port).and_then(|socket| async { let socket_addr = socket.peer_addr()?; // This prevents load balancer from severing the connection. @@ -328,6 +328,7 @@ impl ConnCfg { tracing::Span::current().record("pid", tracing::field::display(client.get_process_id())); let stream = connection.stream.into_inner(); + // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( cold_start_info = ctx.cold_start_info().as_str(), "connected to compute node at {host} ({socket_addr}) sslmode={:?}", diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index cc456f3667..8e71f552a5 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -146,7 +146,7 @@ pub(crate) async fn handle_client( stream: S, conn_gauge: NumClientConnectionsGuard<'static>, ) -> Result>, ClientRequestError> { - info!( + debug!( protocol = %ctx.protocol(), "handling interactive connection from client" ); diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index 6cf99c0c97..d057ee0bfd 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -8,7 +8,7 @@ use pq_proto::StartupMessageParams; use smol_str::SmolStr; use tokio::sync::mpsc; use tracing::field::display; -use tracing::{debug, info, info_span, Span}; +use tracing::{debug, info_span, Span}; use try_lock::TryLock; use uuid::Uuid; @@ -122,6 +122,7 @@ impl RequestMonitoring { protocol: Protocol, region: &'static str, ) -> Self { + // TODO: be careful with long lived spans let span = info_span!( "connect_request", %protocol, @@ -384,6 +385,10 @@ impl RequestMonitoringInner { } else { ConnectOutcome::Failed }; + + // TODO: get rid of entirely/refactor + // check for false positives + // AND false negatives if let Some(rejected) = self.rejected { let ep = self .endpoint_id @@ -391,7 +396,7 @@ impl RequestMonitoringInner { .map(|x| x.as_str()) .unwrap_or_default(); // This makes sense only if cache is disabled - info!( + debug!( ?outcome, ?rejected, ?ep, diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index e388d8a538..50903e2f1e 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -8,7 +8,7 @@ use std::time::Duration; use dashmap::DashMap; use tokio::time::Instant; -use tracing::info; +use tracing::{debug, info}; use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError}; use crate::auth::backend::ComputeUserInfo; @@ -214,7 +214,7 @@ impl ApiLocks { self.metrics .semaphore_acquire_seconds .observe(now.elapsed().as_secs_f64()); - info!("acquired permit {:?}", now.elapsed().as_secs_f64()); + debug!("acquired permit {:?}", now.elapsed().as_secs_f64()); Ok(WakeComputePermit { permit: permit? }) } diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs index 26ff4e1402..8f4ae13f33 100644 --- a/proxy/src/control_plane/client/neon.rs +++ b/proxy/src/control_plane/client/neon.rs @@ -73,6 +73,8 @@ impl NeonControlPlaneClient { .endpoints_cache .is_valid(ctx, &user_info.endpoint.normalize()) { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. info!("endpoint is not valid, skipping the request"); return Ok(AuthInfo::default()); } @@ -92,7 +94,7 @@ impl NeonControlPlaneClient { ]) .build()?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self.endpoint.execute(request).await?; @@ -104,10 +106,12 @@ impl NeonControlPlaneClient { // TODO(anna): retry Err(e) => { return if e.get_reason().is_not_found() { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. Ok(AuthInfo::default()) } else { Err(e.into()) - } + }; } }; @@ -163,7 +167,7 @@ impl NeonControlPlaneClient { .build() .map_err(GetEndpointJwksError::RequestBuild)?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self @@ -220,7 +224,7 @@ impl NeonControlPlaneClient { let request = request_builder.build()?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self.endpoint.execute(request).await?; @@ -249,6 +253,7 @@ impl NeonControlPlaneClient { Ok(node) } .map_err(crate::error::log_error) + // TODO: redo this span stuff .instrument(info_span!("http", id = request_id)) .await } diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs index 89df48c5d3..11f426819d 100644 --- a/proxy/src/stream.rs +++ b/proxy/src/stream.rs @@ -133,6 +133,7 @@ impl PqStream { msg: &'static str, error_kind: ErrorKind, ) -> Result { + // TODO: only log this for actually interesting errors tracing::info!( kind = error_kind.to_metric_label(), msg, From 33dce25af8ea722b3bf53467616fb5156dd41249 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 20 Nov 2024 11:07:45 +0000 Subject: [PATCH 17/22] safekeeper: block deletion on protocol handler shutdown (#9364) ## Problem Two recently observed log errors indicate safekeeper tasks for a timeline running after that timeline's deletion has started. - https://github.com/neondatabase/neon/issues/8972 - https://github.com/neondatabase/neon/issues/8974 These code paths do not have a mechanism that coordinates task shutdown with the overall shutdown of the timeline. ## Summary of changes - Add a `Gate` to `Timeline` - Take the gate as part of resident timeline guard: any code that holds a guard over a timeline staying resident should also hold a guard over the timeline's total lifetime. - Take the gate from the wal removal task - Respect Timeline::cancel in WAL send/recv code, so that we do not block shutdown indefinitely. - Add a test that deletes timelines with open pageserver+compute connections, to check these get torn down as expected. There is some risk to introducing gates: if there is code holding a gate which does not properly respect a cancellation token, it can cause shutdown hangs. The risk of this for safekeepers is lower in practice than it is for other services, because in a healthy timeline deletion, the compute is shutdown first, then the timeline is deleted on the pageserver, and finally it is deleted on the safekeepers -- that makes it much less likely that some protocol handler will still be running. Closes: #8972 Closes: #8974 --- libs/postgres_backend/src/lib.rs | 7 +- safekeeper/src/receive_wal.rs | 29 +++++- safekeeper/src/send_wal.rs | 37 ++++--- safekeeper/src/timeline.rs | 120 +++++++++-------------- safekeeper/src/timeline_guard.rs | 11 ++- safekeeper/src/timeline_manager.rs | 55 +++++++++-- safekeeper/src/timelines_global_map.rs | 4 +- safekeeper/src/wal_backup.rs | 42 +++++--- test_runner/regress/test_wal_acceptor.py | 83 ++++++++++++++++ 9 files changed, 270 insertions(+), 118 deletions(-) diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index 9075a019b4..8c024375c1 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -834,7 +834,7 @@ impl PostgresBackend { use CopyStreamHandlerEnd::*; let expected_end = match &end { - ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF => true, + ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true, CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error)) if is_expected_io_error(io_error) => { @@ -874,6 +874,9 @@ impl PostgresBackend { // message from server' when it receives ErrorResponse (anything but // CopyData/CopyDone) back. CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)), + + // When cancelled, send no response: we must not risk blocking on sending that response + Cancelled => None, _ => None, }; if let Some((err, errcode)) = err_to_send_and_errcode { @@ -1051,6 +1054,8 @@ pub enum CopyStreamHandlerEnd { /// The connection was lost #[error("connection error: {0}")] Disconnected(#[from] ConnectionError), + #[error("Shutdown")] + Cancelled, /// Some other error #[error(transparent)] Other(#[from] anyhow::Error), diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 2edcc4ef6f..bfa1764abf 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -239,6 +239,10 @@ impl SafekeeperPostgresHandler { pgb: &mut PostgresBackend, tli: &mut Option, ) -> Result<(), CopyStreamHandlerEnd> { + // The `tli` parameter is only used for passing _out_ a timeline, one should + // not have been passed in. + assert!(tli.is_none()); + // Notify the libpq client that it's allowed to send `CopyData` messages pgb.write_message(&BeMessage::CopyBothResponse).await?; @@ -256,6 +260,7 @@ impl SafekeeperPostgresHandler { // sends, so this avoids deadlocks. let mut pgb_reader = pgb.split().context("START_WAL_PUSH split")?; let peer_addr = *pgb.get_peer_addr(); + let mut network_reader = NetworkReader { ttid: self.ttid, conn_id: self.conn_id, @@ -275,10 +280,14 @@ impl SafekeeperPostgresHandler { .subscribe(); *tli = Some(timeline.wal_residence_guard().await?); + let timeline_cancel = timeline.cancel.clone(); tokio::select! { // todo: add read|write .context to these errors r = network_reader.run(msg_tx, msg_rx, reply_tx, timeline, next_msg) => r, r = network_write(pgb, reply_rx, pageserver_feedback_rx) => r, + _ = timeline_cancel.cancelled() => { + return Err(CopyStreamHandlerEnd::Cancelled); + } } } else { res.map(|_| ()) @@ -303,7 +312,7 @@ impl SafekeeperPostgresHandler { // Otherwise, WalAcceptor thread must have errored. match wal_acceptor_res { - Ok(Ok(_)) => Ok(()), // can't happen currently; would be if we add graceful termination + Ok(Ok(_)) => Ok(()), // Clean shutdown Ok(Err(e)) => Err(CopyStreamHandlerEnd::Other(e.context("WAL acceptor"))), Err(_) => Err(CopyStreamHandlerEnd::Other(anyhow!( "WalAcceptor task panicked", @@ -356,6 +365,7 @@ impl<'a, IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'a, IO> { Ok((tli, next_msg)) } + /// This function is cancellation-safe (only does network I/O and channel read/writes). async fn run( self, msg_tx: Sender, @@ -397,6 +407,7 @@ async fn read_network_loop( loop { let started = Instant::now(); let size = next_msg.size(); + match msg_tx.send_timeout(next_msg, SLOW_THRESHOLD).await { Ok(()) => {} // Slow send, log a message and keep trying. Log context has timeline ID. @@ -428,6 +439,8 @@ async fn read_network_loop( /// Read replies from WalAcceptor and pass them back to socket. Returns Ok(()) /// if reply_rx closed; it must mean WalAcceptor terminated, joining it should /// tell the error. +/// +/// This function is cancellation-safe (only does network I/O and channel read/writes). async fn network_write( pgb_writer: &mut PostgresBackend, mut reply_rx: Receiver, @@ -461,7 +474,7 @@ async fn network_write( Some(AcceptorProposerMessage::AppendResponse(append_response)) } _ => None, - } + }, }; let Some(msg) = msg else { @@ -527,6 +540,10 @@ impl WalAcceptor { /// The main loop. Returns Ok(()) if either msg_rx or reply_tx got closed; /// it must mean that network thread terminated. + /// + /// This function is *not* cancellation safe, it does local disk I/O: it should always + /// be allowed to run to completion. It respects Timeline::cancel and shuts down cleanly + /// when that gets triggered. async fn run(&mut self) -> anyhow::Result<()> { let walreceiver_guard = self.tli.get_walreceivers().register(self.conn_id); @@ -541,7 +558,7 @@ impl WalAcceptor { // Tracks whether we have unflushed appends. let mut dirty = false; - loop { + while !self.tli.is_cancelled() { let reply = tokio::select! { // Process inbound message. msg = self.msg_rx.recv() => { @@ -599,6 +616,10 @@ impl WalAcceptor { WAL_RECEIVER_QUEUE_DEPTH.observe(self.msg_rx.len() as f64); None // no reply } + + _ = self.tli.cancel.cancelled() => { + break; + } }; // Send reply, if any. @@ -610,7 +631,7 @@ impl WalAcceptor { } // Flush WAL on disconnect, see https://github.com/neondatabase/neon/issues/9259. - if dirty { + if dirty && !self.tli.cancel.is_cancelled() { self.tli .process_msg(&ProposerAcceptorMessage::FlushWAL) .await?; diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 6d94ff98b1..aa65ec851b 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -456,6 +456,8 @@ impl SafekeeperPostgresHandler { // not synchronized with sends, so this avoids deadlocks. let reader = pgb.split().context("START_REPLICATION split")?; + let tli_cancel = tli.cancel.clone(); + let mut sender = WalSender { pgb, // should succeed since we're already holding another guard @@ -479,6 +481,9 @@ impl SafekeeperPostgresHandler { // todo: add read|write .context to these errors r = sender.run() => r, r = reply_reader.run() => r, + _ = tli_cancel.cancelled() => { + return Err(CopyStreamHandlerEnd::Cancelled); + } }; let ws_state = ws_guard @@ -557,6 +562,7 @@ impl WalSender<'_, IO> { /// Send WAL until /// - an error occurs /// - receiver is caughtup and there is no computes (if streaming up to commit_lsn) + /// - timeline's cancellation token fires /// /// Err(CopyStreamHandlerEnd) is always returned; Result is used only for ? /// convenience. @@ -601,15 +607,14 @@ impl WalSender<'_, IO> { }; let send_buf = &send_buf[..send_size]; - // and send it - self.pgb - .write_message(&BeMessage::XLogData(XLogDataBody { - wal_start: self.start_pos.0, - wal_end: self.end_pos.0, - timestamp: get_current_timestamp(), - data: send_buf, - })) - .await?; + // and send it, while respecting Timeline::cancel + let msg = BeMessage::XLogData(XLogDataBody { + wal_start: self.start_pos.0, + wal_end: self.end_pos.0, + timestamp: get_current_timestamp(), + data: send_buf, + }); + self.pgb.write_message(&msg).await?; if let Some(appname) = &self.appname { if appname == "replica" { @@ -674,13 +679,13 @@ impl WalSender<'_, IO> { } } - self.pgb - .write_message(&BeMessage::KeepAlive(WalSndKeepAlive { - wal_end: self.end_pos.0, - timestamp: get_current_timestamp(), - request_reply: true, - })) - .await?; + let msg = BeMessage::KeepAlive(WalSndKeepAlive { + wal_end: self.end_pos.0, + timestamp: get_current_timestamp(), + request_reply: true, + }); + + self.pgb.write_message(&msg).await?; } } diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 85add6bfea..ef928f7633 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; use tokio::fs::{self}; use tokio_util::sync::CancellationToken; use utils::id::TenantId; +use utils::sync::gate::Gate; use std::cmp::max; use std::ops::{Deref, DerefMut}; @@ -467,6 +468,10 @@ pub struct Timeline { timeline_dir: Utf8PathBuf, manager_ctl: ManagerCtl, + /// Hold this gate from code that depends on the Timeline's non-shut-down state. While holding + /// this gate, you must respect [`Timeline::cancel`] + pub(crate) gate: Gate, + /// Delete/cancel will trigger this, background tasks should drop out as soon as it fires pub(crate) cancel: CancellationToken, @@ -508,6 +513,7 @@ impl Timeline { mutex: RwLock::new(shared_state), walsenders: WalSenders::new(walreceivers.clone()), walreceivers, + gate: Default::default(), cancel: CancellationToken::default(), manager_ctl: ManagerCtl::new(), broker_active: AtomicBool::new(false), @@ -533,56 +539,6 @@ impl Timeline { )) } - /// Initialize fresh timeline on disk and start background tasks. If init - /// fails, timeline is cancelled and cannot be used anymore. - /// - /// Init is transactional, so if it fails, created files will be deleted, - /// and state on disk should remain unchanged. - pub async fn init_new( - self: &Arc, - shared_state: &mut WriteGuardSharedState<'_>, - conf: &SafeKeeperConf, - broker_active_set: Arc, - partial_backup_rate_limiter: RateLimiter, - ) -> Result<()> { - match fs::metadata(&self.timeline_dir).await { - Ok(_) => { - // Timeline directory exists on disk, we should leave state unchanged - // and return error. - bail!(TimelineError::Invalid(self.ttid)); - } - Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} - Err(e) => { - return Err(e.into()); - } - } - - // Create timeline directory. - fs::create_dir_all(&self.timeline_dir).await?; - - // Write timeline to disk and start background tasks. - if let Err(e) = shared_state.sk.state_mut().flush().await { - // Bootstrap failed, cancel timeline and remove timeline directory. - self.cancel(shared_state); - - if let Err(fs_err) = fs::remove_dir_all(&self.timeline_dir).await { - warn!( - "failed to remove timeline {} directory after bootstrap failure: {}", - self.ttid, fs_err - ); - } - - return Err(e); - } - self.bootstrap( - shared_state, - conf, - broker_active_set, - partial_backup_rate_limiter, - ); - Ok(()) - } - /// Bootstrap new or existing timeline starting background tasks. pub fn bootstrap( self: &Arc, @@ -593,33 +549,61 @@ impl Timeline { ) { let (tx, rx) = self.manager_ctl.bootstrap_manager(); + let Ok(gate_guard) = self.gate.enter() else { + // Init raced with shutdown + return; + }; + // Start manager task which will monitor timeline state and update // background tasks. - tokio::spawn(timeline_manager::main_task( - ManagerTimeline { tli: self.clone() }, - conf.clone(), - broker_active_set, - tx, - rx, - partial_backup_rate_limiter, - )); + tokio::spawn({ + let this = self.clone(); + let conf = conf.clone(); + async move { + let _gate_guard = gate_guard; + timeline_manager::main_task( + ManagerTimeline { tli: this }, + conf, + broker_active_set, + tx, + rx, + partial_backup_rate_limiter, + ) + .await + } + }); + } + + /// Background timeline activities (which hold Timeline::gate) will no + /// longer run once this function completes. + pub async fn shutdown(&self) { + info!("timeline {} shutting down", self.ttid); + self.cancel.cancel(); + + // Wait for any concurrent tasks to stop using this timeline, to avoid e.g. attempts + // to read deleted files. + self.gate.close().await; } /// Delete timeline from disk completely, by removing timeline directory. - /// Background timeline activities will stop eventually. /// /// Also deletes WAL in s3. Might fail if e.g. s3 is unavailable, but /// deletion API endpoint is retriable. + /// + /// Timeline must be in shut-down state (i.e. call [`Self::shutdown`] first) pub async fn delete( &self, shared_state: &mut WriteGuardSharedState<'_>, only_local: bool, ) -> Result { - self.cancel(shared_state); + // Assert that [`Self::shutdown`] was already called + assert!(self.cancel.is_cancelled()); + assert!(self.gate.close_complete()); + + // Close associated FDs. Nobody will be able to touch timeline data once + // it is cancelled, so WAL storage won't be opened again. + shared_state.sk.close_wal_store(); - // TODO: It's better to wait for s3 offloader termination before - // removing data from s3. Though since s3 doesn't have transactions it - // still wouldn't guarantee absense of data after removal. let conf = GlobalTimelines::get_global_config(); if !only_local && conf.is_wal_backup_enabled() { // Note: we concurrently delete remote storage data from multiple @@ -631,16 +615,6 @@ impl Timeline { Ok(dir_existed) } - /// Cancel timeline to prevent further usage. Background tasks will stop - /// eventually after receiving cancellation signal. - fn cancel(&self, shared_state: &mut WriteGuardSharedState<'_>) { - info!("timeline {} is cancelled", self.ttid); - self.cancel.cancel(); - // Close associated FDs. Nobody will be able to touch timeline data once - // it is cancelled, so WAL storage won't be opened again. - shared_state.sk.close_wal_store(); - } - /// Returns if timeline is cancelled. pub fn is_cancelled(&self) -> bool { self.cancel.is_cancelled() diff --git a/safekeeper/src/timeline_guard.rs b/safekeeper/src/timeline_guard.rs index 1ddac573d2..9102a40df8 100644 --- a/safekeeper/src/timeline_guard.rs +++ b/safekeeper/src/timeline_guard.rs @@ -7,6 +7,7 @@ use std::collections::HashSet; use tracing::debug; +use utils::sync::gate::GateGuard; use crate::timeline_manager::ManagerCtlMessage; @@ -16,6 +17,12 @@ pub struct GuardId(u64); pub struct ResidenceGuard { manager_tx: tokio::sync::mpsc::UnboundedSender, guard_id: GuardId, + + /// [`ResidenceGuard`] represents a guarantee that a timeline's data remains resident, + /// which by extension also means the timeline is not shut down (since after shut down + /// our data may be deleted). Therefore everyone holding a residence guard must also + /// hold a guard on [`crate::timeline::Timeline::gate`] + _gate_guard: GateGuard, } impl Drop for ResidenceGuard { @@ -52,7 +59,8 @@ impl AccessService { self.guards.is_empty() } - pub(crate) fn create_guard(&mut self) -> ResidenceGuard { + /// `timeline_gate_guard` is a guarantee that the timeline is not shut down + pub(crate) fn create_guard(&mut self, timeline_gate_guard: GateGuard) -> ResidenceGuard { let guard_id = self.next_guard_id; self.next_guard_id += 1; self.guards.insert(guard_id); @@ -63,6 +71,7 @@ impl AccessService { ResidenceGuard { manager_tx: self.manager_tx.clone(), guard_id, + _gate_guard: timeline_gate_guard, } } diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index e9fed21bf5..c02fb904cf 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -266,8 +266,10 @@ pub async fn main_task( // Start recovery task which always runs on the timeline. if !mgr.is_offloaded && mgr.conf.peer_recovery_enabled { - let tli = mgr.wal_resident_timeline(); - mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone()))); + // Recovery task is only spawned if we can get a residence guard (i.e. timeline is not already shutting down) + if let Ok(tli) = mgr.wal_resident_timeline() { + mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone()))); + } } // If timeline is evicted, reflect that in the metric. @@ -375,6 +377,13 @@ pub async fn main_task( // shutdown background tasks if mgr.conf.is_wal_backup_enabled() { + if let Some(backup_task) = mgr.backup_task.take() { + // If we fell through here, then the timeline is shutting down. This is important + // because otherwise joining on the wal_backup handle might hang. + assert!(mgr.tli.cancel.is_cancelled()); + + backup_task.join().await; + } wal_backup::update_task(&mut mgr, false, &last_state).await; } @@ -442,10 +451,18 @@ impl Manager { /// Get a WalResidentTimeline. /// Manager code must use this function instead of one from `Timeline` /// directly, because it will deadlock. - pub(crate) fn wal_resident_timeline(&mut self) -> WalResidentTimeline { + /// + /// This function is fallible because the guard may not be created if the timeline is + /// shutting down. + pub(crate) fn wal_resident_timeline(&mut self) -> anyhow::Result { assert!(!self.is_offloaded); - let guard = self.access_service.create_guard(); - WalResidentTimeline::new(self.tli.clone(), guard) + let guard = self.access_service.create_guard( + self.tli + .gate + .enter() + .map_err(|_| anyhow::anyhow!("Timeline shutting down"))?, + ); + Ok(WalResidentTimeline::new(self.tli.clone(), guard)) } /// Get a snapshot of the timeline state. @@ -559,6 +576,11 @@ impl Manager { if removal_horizon_segno > self.last_removed_segno { // we need to remove WAL + let Ok(timeline_gate_guard) = self.tli.gate.enter() else { + tracing::info!("Timeline shutdown, not spawning WAL removal task"); + return; + }; + let remover = match self.tli.read_shared_state().await.sk { StateSK::Loaded(ref sk) => { crate::wal_storage::Storage::remove_up_to(&sk.wal_store, removal_horizon_segno) @@ -573,6 +595,8 @@ impl Manager { self.wal_removal_task = Some(tokio::spawn( async move { + let _timeline_gate_guard = timeline_gate_guard; + remover.await?; Ok(removal_horizon_segno) } @@ -619,10 +643,15 @@ impl Manager { return; } + let Ok(resident) = self.wal_resident_timeline() else { + // Shutting down + return; + }; + // Get WalResidentTimeline and start partial backup task. let cancel = CancellationToken::new(); let handle = tokio::spawn(wal_backup_partial::main_task( - self.wal_resident_timeline(), + resident, self.conf.clone(), self.global_rate_limiter.clone(), cancel.clone(), @@ -664,7 +693,7 @@ impl Manager { self.partial_backup_task = None; } - let tli = self.wal_resident_timeline(); + let tli = self.wal_resident_timeline()?; let mut partial_backup = PartialBackup::new(tli, self.conf.clone()).await; // Reset might fail e.g. when cfile is already reset but s3 removal // failed, so set manager state to None beforehand. In any case caller @@ -688,7 +717,12 @@ impl Manager { let guard = if self.is_offloaded { Err(anyhow::anyhow!("timeline is offloaded, can't get a guard")) } else { - Ok(self.access_service.create_guard()) + match self.tli.gate.enter() { + Ok(gate_guard) => Ok(self.access_service.create_guard(gate_guard)), + Err(_) => Err(anyhow::anyhow!( + "timeline is shutting down, can't get a guard" + )), + } }; if tx.send(guard).is_err() { @@ -699,7 +733,10 @@ impl Manager { let result = if self.is_offloaded { None } else { - Some(self.access_service.create_guard()) + match self.tli.gate.enter() { + Ok(gate_guard) => Some(self.access_service.create_guard(gate_guard)), + Err(_) => None, + } }; if tx.send(result).is_err() { diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index 33d94da034..067945fd5f 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -457,10 +457,12 @@ impl GlobalTimelines { Ok(timeline) => { let was_active = timeline.broker_active.load(Ordering::Relaxed); + info!("deleting timeline {}, only_local={}", ttid, only_local); + timeline.shutdown().await; + // Take a lock and finish the deletion holding this mutex. let mut shared_state = timeline.write_shared_state().await; - info!("deleting timeline {}, only_local={}", ttid, only_local); let dir_existed = timeline.delete(&mut shared_state, only_local).await?; Ok(TimelineDeleteForceResult { diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 6c87e5a926..34b5dbeaa1 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -25,7 +25,6 @@ use tokio::fs::File; use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::sync::{watch, OnceCell}; -use tokio::time::sleep; use tracing::*; use utils::{id::TenantTimelineId, lsn::Lsn}; @@ -46,6 +45,14 @@ pub struct WalBackupTaskHandle { handle: JoinHandle<()>, } +impl WalBackupTaskHandle { + pub(crate) async fn join(self) { + if let Err(e) = self.handle.await { + error!("WAL backup task panicked: {}", e); + } + } +} + /// Do we have anything to upload to S3, i.e. should safekeepers run backup activity? pub(crate) fn is_wal_backup_required( wal_seg_size: usize, @@ -74,11 +81,12 @@ pub(crate) async fn update_task(mgr: &mut Manager, need_backup: bool, state: &St let (shutdown_tx, shutdown_rx) = mpsc::channel(1); - let async_task = backup_task_main( - mgr.wal_resident_timeline(), - mgr.conf.backup_parallel_jobs, - shutdown_rx, - ); + let Ok(resident) = mgr.wal_resident_timeline() else { + info!("Timeline shut down"); + return; + }; + + let async_task = backup_task_main(resident, mgr.conf.backup_parallel_jobs, shutdown_rx); let handle = if mgr.conf.current_thread_runtime { tokio::spawn(async_task) @@ -108,9 +116,7 @@ async fn shut_down_task(entry: &mut Option) { // Tell the task to shutdown. Error means task exited earlier, that's ok. let _ = wb_handle.shutdown_tx.send(()).await; // Await the task itself. TODO: restart panicked tasks earlier. - if let Err(e) = wb_handle.handle.await { - warn!("WAL backup task panicked: {}", e); - } + wb_handle.join().await; } } @@ -214,6 +220,7 @@ async fn backup_task_main( let _guard = WAL_BACKUP_TASKS.guard(); info!("started"); + let cancel = tli.tli.cancel.clone(); let mut wb = WalBackupTask { wal_seg_size: tli.get_wal_seg_size().await, commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(), @@ -230,25 +237,34 @@ async fn backup_task_main( _ = wb.run() => {} _ = shutdown_rx.recv() => { canceled = true; + }, + _ = cancel.cancelled() => { + canceled = true; } } info!("task {}", if canceled { "canceled" } else { "terminated" }); } impl WalBackupTask { + /// This function must be called from a select! that also respects self.timeline's + /// cancellation token. This is done in [`backup_task_main`]. + /// + /// The future returned by this function is safe to drop at any time because it + /// does not write to local disk. async fn run(&mut self) { let mut backup_lsn = Lsn(0); let mut retry_attempt = 0u32; // offload loop - loop { + while !self.timeline.cancel.is_cancelled() { if retry_attempt == 0 { // wait for new WAL to arrive if let Err(e) = self.commit_lsn_watch_rx.changed().await { - // should never happen, as we hold Arc to timeline. + // should never happen, as we hold Arc to timeline and transmitter's lifetime + // is within Timeline's error!("commit_lsn watch shut down: {:?}", e); return; - } + }; } else { // or just sleep if we errored previously let mut retry_delay = UPLOAD_FAILURE_RETRY_MAX_MS; @@ -256,7 +272,7 @@ impl WalBackupTask { { retry_delay = min(retry_delay, backoff_delay); } - sleep(Duration::from_millis(retry_delay)).await; + tokio::time::sleep(Duration::from_millis(retry_delay)).await; } let commit_lsn = *self.commit_lsn_watch_rx.borrow(); diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 0676b3dd9a..6eaaa3c37f 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1784,6 +1784,89 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): cur.execute("INSERT INTO t (key) VALUES (123)") +def test_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder): + """ + Test deleting timelines on a safekeeper while they're under load. + + This should not happen under normal operation, but it can happen if + there is some rogue compute/pageserver that is writing/reading to a + safekeeper that we're migrating a timeline away from, or if the timeline + is being deleted while such a rogue client is running. + """ + neon_env_builder.auth_enabled = True + env = neon_env_builder.init_start() + + # Create two endpoints that will generate load + timeline_id_a = env.create_branch("deleteme_a") + timeline_id_b = env.create_branch("deleteme_b") + + endpoint_a = env.endpoints.create("deleteme_a") + endpoint_a.start() + endpoint_b = env.endpoints.create("deleteme_b") + endpoint_b.start() + + # Get tenant and timeline IDs + tenant_id = env.initial_tenant + + # Start generating load on both timelines + def generate_load(endpoint: Endpoint): + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)") + while True: + try: + cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'data'") + except: # noqa + # Ignore errors since timeline may be deleted + break + + t_a = threading.Thread(target=generate_load, args=(endpoint_a,)) + t_b = threading.Thread(target=generate_load, args=(endpoint_b,)) + try: + t_a.start() + t_b.start() + + # Let the load run for a bit + log.info("Warming up...") + time.sleep(2) + + # Safekeeper errors will propagate to the pageserver: it is correct that these are + # logged at error severity because they indicate the pageserver is trying to read + # a timeline that it shouldn't. + env.pageserver.allowed_errors.extend( + [ + ".*Timeline.*was cancelled.*", + ".*Timeline.*was not found.*", + ] + ) + + # Try deleting timelines while under load + sk = env.safekeepers[0] + sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) + + # Delete first timeline + log.info(f"Deleting {timeline_id_a}...") + assert sk_http.timeline_delete(tenant_id, timeline_id_a, only_local=True)["dir_existed"] + + # Delete second timeline + log.info(f"Deleting {timeline_id_b}...") + assert sk_http.timeline_delete(tenant_id, timeline_id_b, only_local=True)["dir_existed"] + + # Verify timelines are gone from disk + sk_data_dir = sk.data_dir + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_a)).exists() + # assert not (sk_data_dir / str(tenant_id) / str(timeline_id_b)).exists() + + finally: + log.info("Stopping endpoints...") + # Stop endpoints with immediate mode because we deleted the timeline out from under the compute, which may cause it to hang + endpoint_a.stop(mode="immediate") + endpoint_b.stop(mode="immediate") + log.info("Joining threads...") + t_a.join() + t_b.join() + + # Basic pull_timeline test. # When live_sk_change is False, compute is restarted to change set of # safekeepers; otherwise it is live reload. From 94e4a0e2a0d43e066bd006a68eb147333ab0d074 Mon Sep 17 00:00:00 2001 From: Fedor Dikarev Date: Wed, 20 Nov 2024 13:04:14 +0100 Subject: [PATCH 18/22] update macos version for runner (#9817) Closes: https://github.com/neondatabase/neon/issues/9816 Run MacOs builds on `macos-15`. As `pkg-config` is bundled in runner image, don't install it with `brew` --- .github/workflows/neon_extra_builds.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml index e827539c80..092831adb9 100644 --- a/.github/workflows/neon_extra_builds.yml +++ b/.github/workflows/neon_extra_builds.yml @@ -38,7 +38,7 @@ jobs: contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') || github.ref_name == 'main' timeout-minutes: 90 - runs-on: macos-14 + runs-on: macos-15 env: # Use release build only, to have less debug info around @@ -52,7 +52,7 @@ jobs: submodules: true - name: Install macOS postgres dependencies - run: brew install flex bison openssl protobuf icu4c pkg-config + run: brew install flex bison openssl protobuf icu4c - name: Set pg 14 revision for caching id: pg_v14_rev From 46beecacce50bf1d113dbb6f31fe2283a598adf7 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 20 Nov 2024 12:23:41 +0000 Subject: [PATCH 19/22] CI(benchmarking): route test failures to on-call-qa-staging-stream (#9813) ## Problem We want to keep `#on-call-staging-stream` channel close to the prod one and redirect notifications from failing benchmarks to another channel for investigation. ## Summary of changes - Send notifications regarding failures in `benchmarking` job to `#on-call-staging-stream` - Send notifications regarding failures in `periodic_pagebench` job to `#on-call-staging-stream` --- .github/workflows/benchmarking.yml | 12 ++++++------ .github/workflows/periodic_pagebench.yml | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 0289f552f9..acea859b4d 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -158,7 +158,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic perf testing: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -506,7 +506,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -643,7 +643,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -759,7 +759,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -874,7 +874,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -974,7 +974,7 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 615937b5a1..1cce348ae2 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -72,7 +72,7 @@ jobs: echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV fi - - name: Start Bench with run_id + - name: Start Bench with run_id run: | curl -k -X 'POST' \ "${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \ @@ -116,7 +116,7 @@ jobs: -H 'accept: application/gzip' \ -H "Authorization: Bearer $API_KEY" \ --output "test_log_${GITHUB_RUN_ID}.gz" - + - name: Unzip Test Log and Print it into this job's log if: always() && steps.poll_step.outputs.too_many_runs != 'true' run: | @@ -134,13 +134,13 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - name: Cleanup Test Resources - if: always() + if: always() run: | curl -k -X 'POST' \ "${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \ From 899933e159b56d8cfe92995befff8b37d6eb55b8 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 20 Nov 2024 12:48:21 +0000 Subject: [PATCH 20/22] scan_log_for_errors: check that regex is correct (#9815) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem I've noticed that we have 2 flaky tests which failed with error: ``` re.error: missing ), unterminated subpattern at position 21 ``` - `test_timeline_archival_chaos` — has been already fixed - `test_sharded_tad_interleaved_after_partial_success` — I didn't manage to find the incorrect regex [Internal link](https://neonprod.grafana.net/goto/yfmVHV7NR?orgId=1) ## Summary of changes - Wrap `re.match` in `try..except` block and print incorrect regex --- test_runner/fixtures/pageserver/allowed_errors.py | 10 ++++++++-- test_runner/fixtures/utils.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py index d05704c8e0..5059039678 100755 --- a/test_runner/fixtures/pageserver/allowed_errors.py +++ b/test_runner/fixtures/pageserver/allowed_errors.py @@ -25,8 +25,14 @@ def scan_pageserver_log_for_errors( # It's an ERROR or WARN. Is it in the allow-list? for a in allowed_errors: - if re.match(a, line): - break + try: + if re.match(a, line): + break + # We can switch `re.error` with `re.PatternError` after 3.13 + # https://docs.python.org/3/library/re.html#re.PatternError + except re.error: + print(f"Invalid regex: '{a}'", file=sys.stderr) + raise else: errors.append((lineno, line)) return errors diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 96a651f0db..bb45385ea6 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -495,8 +495,14 @@ def scan_log_for_errors(input: Iterable[str], allowed_errors: list[str]) -> list # It's an ERROR or WARN. Is it in the allow-list? for a in allowed_errors: - if re.match(a, line): - break + try: + if re.match(a, line): + break + # We can switch `re.error` with `re.PatternError` after 3.13 + # https://docs.python.org/3/library/re.html#re.PatternError + except re.error: + log.error(f"Invalid regex: '{a}'") + raise else: errors.append((lineno, line)) return errors From bf7d859a8bdb26a6ac4ce1c17fec948d7bcecdcb Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Wed, 20 Nov 2024 13:50:36 +0100 Subject: [PATCH 21/22] proxy: Rename RequestMonitoring to RequestContext (#9805) ## Problem It is called context/ctx everywhere and the Monitoring suffix needlessly confuses with proper monitoring code. ## Summary of changes * Rename RequestMonitoring to RequestContext * Rename RequestMonitoringInner to RequestContextInner --- proxy/src/auth/backend/classic.rs | 4 +-- proxy/src/auth/backend/console_redirect.rs | 8 +++--- proxy/src/auth/backend/hacks.rs | 6 ++-- proxy/src/auth/backend/jwt.rs | 26 +++++++++--------- proxy/src/auth/backend/local.rs | 4 +-- proxy/src/auth/backend/mod.rs | 32 +++++++++++----------- proxy/src/auth/credentials.rs | 30 ++++++++++---------- proxy/src/auth/flow.rs | 4 +-- proxy/src/bin/pg_sni_router.rs | 8 +++--- proxy/src/cache/endpoints.rs | 4 +-- proxy/src/compute.rs | 4 +-- proxy/src/console_redirect_proxy.rs | 6 ++-- proxy/src/context/mod.rs | 22 +++++++-------- proxy/src/context/parquet.rs | 6 ++-- proxy/src/control_plane/client/mock.rs | 10 +++---- proxy/src/control_plane/client/mod.rs | 12 ++++---- proxy/src/control_plane/client/neon.rs | 16 +++++------ proxy/src/control_plane/mod.rs | 12 ++++---- proxy/src/proxy/connect_compute.rs | 10 +++---- proxy/src/proxy/handshake.rs | 4 +-- proxy/src/proxy/mod.rs | 6 ++-- proxy/src/proxy/tests/mitm.rs | 2 +- proxy/src/proxy/tests/mod.rs | 27 +++++++++--------- proxy/src/proxy/wake_compute.rs | 4 +-- proxy/src/serverless/backend.rs | 16 +++++------ proxy/src/serverless/conn_pool.rs | 4 +-- proxy/src/serverless/conn_pool_lib.rs | 4 +-- proxy/src/serverless/http_conn_pool.rs | 6 ++-- proxy/src/serverless/local_conn_pool.rs | 6 ++-- proxy/src/serverless/mod.rs | 6 ++-- proxy/src/serverless/sql_over_http.rs | 12 ++++---- proxy/src/serverless/websocket.rs | 4 +-- 32 files changed, 162 insertions(+), 163 deletions(-) diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs index 87a02133c8..491b272ac4 100644 --- a/proxy/src/auth/backend/classic.rs +++ b/proxy/src/auth/backend/classic.rs @@ -5,13 +5,13 @@ use super::{ComputeCredentials, ComputeUserInfo}; use crate::auth::backend::ComputeCredentialKeys; use crate::auth::{self, AuthFlow}; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::stream::{PqStream, Stream}; use crate::{compute, sasl}; pub(super) async fn authenticate( - ctx: &RequestMonitoring, + ctx: &RequestContext, creds: ComputeUserInfo, client: &mut PqStream>, config: &'static AuthenticationConfig, diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs index e25dc3d45e..5772471486 100644 --- a/proxy/src/auth/backend/console_redirect.rs +++ b/proxy/src/auth/backend/console_redirect.rs @@ -8,7 +8,7 @@ use tracing::{info, info_span}; use super::ComputeCredentialKeys; use crate::cache::Cached; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::{self, CachedNodeInfo, NodeInfo}; use crate::error::{ReportableError, UserFacingError}; use crate::proxy::connect_compute::ComputeConnectBackend; @@ -71,7 +71,7 @@ impl ConsoleRedirectBackend { pub(crate) async fn authenticate( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, auth_config: &'static AuthenticationConfig, client: &mut PqStream, ) -> auth::Result { @@ -87,7 +87,7 @@ pub struct ConsoleRedirectNodeInfo(pub(super) NodeInfo); impl ComputeConnectBackend for ConsoleRedirectNodeInfo { async fn wake_compute( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, ) -> Result { Ok(Cached::new_uncached(self.0.clone())) } @@ -98,7 +98,7 @@ impl ComputeConnectBackend for ConsoleRedirectNodeInfo { } async fn authenticate( - ctx: &RequestMonitoring, + ctx: &RequestContext, auth_config: &'static AuthenticationConfig, link_uri: &reqwest::Url, client: &mut PqStream, diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs index e651df1d34..3316543022 100644 --- a/proxy/src/auth/backend/hacks.rs +++ b/proxy/src/auth/backend/hacks.rs @@ -4,7 +4,7 @@ use tracing::{debug, info}; use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint}; use crate::auth::{self, AuthFlow}; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::intern::EndpointIdInt; use crate::sasl; @@ -15,7 +15,7 @@ use crate::stream::{self, Stream}; /// These properties are benefical for serverless JS workers, so we /// use this mechanism for websocket connections. pub(crate) async fn authenticate_cleartext( - ctx: &RequestMonitoring, + ctx: &RequestContext, info: ComputeUserInfo, client: &mut stream::PqStream>, secret: AuthSecret, @@ -57,7 +57,7 @@ pub(crate) async fn authenticate_cleartext( /// Similar to [`authenticate_cleartext`], but there's a specific password format, /// and passwords are not yet validated (we don't know how to validate them!) pub(crate) async fn password_hack_no_authentication( - ctx: &RequestMonitoring, + ctx: &RequestContext, info: ComputeUserInfoNoEndpoint, client: &mut stream::PqStream>, ) -> auth::Result<(ComputeUserInfo, Vec)> { diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs index bfc674139b..f721d81aa2 100644 --- a/proxy/src/auth/backend/jwt.rs +++ b/proxy/src/auth/backend/jwt.rs @@ -17,7 +17,7 @@ use thiserror::Error; use tokio::time::Instant; use crate::auth::backend::ComputeCredentialKeys; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::errors::GetEndpointJwksError; use crate::http::read_body_with_limit; use crate::intern::RoleNameInt; @@ -39,7 +39,7 @@ const JWKS_FETCH_RETRIES: u32 = 3; pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static { fn fetch_auth_rules( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> impl Future, FetchAuthRulesError>> + Send; } @@ -144,7 +144,7 @@ impl JwkCacheEntryLock { async fn renew_jwks( &self, _permit: JwkRenewalPermit<'_>, - ctx: &RequestMonitoring, + ctx: &RequestContext, client: &reqwest_middleware::ClientWithMiddleware, endpoint: EndpointId, auth_rules: &F, @@ -261,7 +261,7 @@ impl JwkCacheEntryLock { async fn get_or_update_jwk_cache( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, client: &reqwest_middleware::ClientWithMiddleware, endpoint: EndpointId, fetch: &F, @@ -314,7 +314,7 @@ impl JwkCacheEntryLock { async fn check_jwt( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, jwt: &str, client: &reqwest_middleware::ClientWithMiddleware, endpoint: EndpointId, @@ -409,7 +409,7 @@ impl JwkCacheEntryLock { impl JwkCache { pub(crate) async fn check_jwt( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, role_name: &RoleName, fetch: &F, @@ -941,7 +941,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL impl FetchAuthRules for Fetch { async fn fetch_auth_rules( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _endpoint: EndpointId, ) -> Result, FetchAuthRulesError> { Ok(self.0.clone()) @@ -1039,7 +1039,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL for token in &tokens { jwk_cache .check_jwt( - &RequestMonitoring::test(), + &RequestContext::test(), endpoint.clone(), role, &fetch, @@ -1097,7 +1097,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL jwk_cache .check_jwt( - &RequestMonitoring::test(), + &RequestContext::test(), endpoint.clone(), &role_name, &fetch, @@ -1136,7 +1136,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL let ep = EndpointId::from("ep"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = jwk_cache .check_jwt(&ctx, ep, &role, &fetch, &bad_jwt) .await @@ -1175,7 +1175,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL // this role_name is not accepted let bad_role_name = RoleName::from("cloud_admin"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = jwk_cache .check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt) .await @@ -1268,7 +1268,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL let ep = EndpointId::from("ep"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); for test in table { let jwt = new_custom_ec_jwt("1".into(), &key, test.body); @@ -1336,7 +1336,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL jwk_cache .check_jwt( - &RequestMonitoring::test(), + &RequestContext::test(), endpoint.clone(), &role_name, &fetch, diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index f9cb085daf..32e0f53615 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -7,7 +7,7 @@ use super::jwt::{AuthRule, FetchAuthRules}; use crate::auth::backend::jwt::FetchAuthRulesError; use crate::compute::ConnCfg; use crate::compute_ctl::ComputeCtlApi; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo}; use crate::control_plane::NodeInfo; use crate::http; @@ -56,7 +56,7 @@ pub static JWKS_ROLE_MAP: ArcSwapOption = ArcSwapOption::c impl FetchAuthRules for StaticAuthRules { async fn fetch_auth_rules( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _endpoint: EndpointId, ) -> Result, FetchAuthRulesError> { let mappings = JWKS_ROLE_MAP.load(); diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 83c72e7be0..57ecd5e499 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -20,7 +20,7 @@ use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint}; use crate::cache::Cached; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::ControlPlaneClient; use crate::control_plane::errors::GetAuthInfoError; use crate::control_plane::{ @@ -210,7 +210,7 @@ impl RateBucketInfo { impl AuthenticationConfig { pub(crate) fn check_rate_limit( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, secret: AuthSecret, endpoint: &EndpointId, is_cleartext: bool, @@ -265,7 +265,7 @@ impl AuthenticationConfig { /// /// All authentication flows will emit an AuthenticationOk message if successful. async fn auth_quirks( - ctx: &RequestMonitoring, + ctx: &RequestContext, api: &impl control_plane::ControlPlaneApi, user_info: ComputeUserInfoMaybeEndpoint, client: &mut stream::PqStream>, @@ -343,7 +343,7 @@ async fn auth_quirks( } async fn authenticate_with_secret( - ctx: &RequestMonitoring, + ctx: &RequestContext, secret: AuthSecret, info: ComputeUserInfo, client: &mut stream::PqStream>, @@ -396,7 +396,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)] pub(crate) async fn authenticate( self, - ctx: &RequestMonitoring, + ctx: &RequestContext, client: &mut stream::PqStream>, allow_cleartext: bool, config: &'static AuthenticationConfig, @@ -436,7 +436,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { impl Backend<'_, ComputeUserInfo> { pub(crate) async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result { match self { Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await, @@ -446,7 +446,7 @@ impl Backend<'_, ComputeUserInfo> { pub(crate) async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { match self { Self::ControlPlane(api, user_info) => { @@ -461,7 +461,7 @@ impl Backend<'_, ComputeUserInfo> { impl ComputeConnectBackend for Backend<'_, ComputeCredentials> { async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result { match self { Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await, @@ -497,7 +497,7 @@ mod tests { use crate::auth::backend::MaskedIp; use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern}; use crate::config::AuthenticationConfig; - use crate::context::RequestMonitoring; + use crate::context::RequestContext; use crate::control_plane::{self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret}; use crate::proxy::NeonOptions; use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo}; @@ -513,7 +513,7 @@ mod tests { impl control_plane::ControlPlaneApi for Auth { async fn get_role_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &super::ComputeUserInfo, ) -> Result { Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone()))) @@ -521,7 +521,7 @@ mod tests { async fn get_allowed_ips_and_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &super::ComputeUserInfo, ) -> Result< (CachedAllowedIps, Option), @@ -535,7 +535,7 @@ mod tests { async fn get_endpoint_jwks( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _endpoint: crate::types::EndpointId, ) -> Result, control_plane::errors::GetEndpointJwksError> { @@ -544,7 +544,7 @@ mod tests { async fn wake_compute( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &super::ComputeUserInfo, ) -> Result { unimplemented!() @@ -623,7 +623,7 @@ mod tests { let (mut client, server) = tokio::io::duplex(1024); let mut stream = PqStream::new(Stream::from_raw(server)); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let api = Auth { ips: vec![], secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()), @@ -700,7 +700,7 @@ mod tests { let (mut client, server) = tokio::io::duplex(1024); let mut stream = PqStream::new(Stream::from_raw(server)); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let api = Auth { ips: vec![], secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()), @@ -752,7 +752,7 @@ mod tests { let (mut client, server) = tokio::io::duplex(1024); let mut stream = PqStream::new(Stream::from_raw(server)); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let api = Auth { ips: vec![], secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()), diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index dab9007400..f6bce9f2d8 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -10,7 +10,7 @@ use thiserror::Error; use tracing::{debug, warn}; use crate::auth::password_hack::parse_endpoint_param; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::{ReportableError, UserFacingError}; use crate::metrics::{Metrics, SniKind}; use crate::proxy::NeonOptions; @@ -86,7 +86,7 @@ pub(crate) fn endpoint_sni( impl ComputeUserInfoMaybeEndpoint { pub(crate) fn parse( - ctx: &RequestMonitoring, + ctx: &RequestContext, params: &StartupMessageParams, sni: Option<&str>, common_names: Option<&HashSet>, @@ -260,7 +260,7 @@ mod tests { fn parse_bare_minimum() -> anyhow::Result<()> { // According to postgresql, only `user` should be required. let options = StartupMessageParams::new([("user", "john_doe")]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id, None); @@ -275,7 +275,7 @@ mod tests { ("database", "world"), // should be ignored ("foo", "bar"), // should be ignored ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id, None); @@ -290,7 +290,7 @@ mod tests { let sni = Some("foo.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.user, "john_doe"); @@ -307,7 +307,7 @@ mod tests { ("options", "-ckey=1 project=bar -c geqo=off"), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id.as_deref(), Some("bar")); @@ -322,7 +322,7 @@ mod tests { ("options", "-ckey=1 endpoint=bar -c geqo=off"), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id.as_deref(), Some("bar")); @@ -340,7 +340,7 @@ mod tests { ), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert!(user_info.endpoint_id.is_none()); @@ -355,7 +355,7 @@ mod tests { ("options", "-ckey=1 endpoint=bar project=foo -c geqo=off"), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert!(user_info.endpoint_id.is_none()); @@ -370,7 +370,7 @@ mod tests { let sni = Some("baz.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.user, "john_doe"); @@ -385,14 +385,14 @@ mod tests { let common_names = Some(["a.com".into(), "b.com".into()].into()); let sni = Some("p1.a.com"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.endpoint_id.as_deref(), Some("p1")); let common_names = Some(["a.com".into(), "b.com".into()].into()); let sni = Some("p1.b.com"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.endpoint_id.as_deref(), Some("p1")); @@ -408,7 +408,7 @@ mod tests { let sni = Some("second.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref()) .expect_err("should fail"); match err { @@ -427,7 +427,7 @@ mod tests { let sni = Some("project.localhost"); let common_names = Some(["example.com".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref()) .expect_err("should fail"); match err { @@ -447,7 +447,7 @@ mod tests { let sni = Some("project.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.endpoint_id.as_deref(), Some("project")); diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs index 1740b59b14..9c6ce151cb 100644 --- a/proxy/src/auth/flow.rs +++ b/proxy/src/auth/flow.rs @@ -11,7 +11,7 @@ use tracing::info; use super::backend::ComputeCredentialKeys; use super::{AuthError, PasswordHackPayload}; use crate::config::TlsServerEndPoint; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::intern::EndpointIdInt; use crate::sasl; @@ -32,7 +32,7 @@ pub(crate) struct Begin; /// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`]. pub(crate) struct Scram<'a>( pub(crate) &'a scram::ServerSecret, - pub(crate) &'a RequestMonitoring, + pub(crate) &'a RequestContext, ); impl AuthMethod for Scram<'_> { diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs index ef5b5e8509..623a0fd3b2 100644 --- a/proxy/src/bin/pg_sni_router.rs +++ b/proxy/src/bin/pg_sni_router.rs @@ -11,7 +11,7 @@ use futures::future::Either; use futures::TryFutureExt; use itertools::Itertools; use proxy::config::TlsServerEndPoint; -use proxy::context::RequestMonitoring; +use proxy::context::RequestContext; use proxy::metrics::{Metrics, ThreadPoolMetrics}; use proxy::protocol2::ConnectionInfo; use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource}; @@ -177,7 +177,7 @@ async fn task_main( .context("failed to set socket option")?; info!(%peer_addr, "serving"); - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, ConnectionInfo { addr: peer_addr, @@ -208,7 +208,7 @@ async fn task_main( const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)"; async fn ssl_handshake( - ctx: &RequestMonitoring, + ctx: &RequestContext, raw_stream: S, tls_config: Arc, tls_server_end_point: TlsServerEndPoint, @@ -259,7 +259,7 @@ async fn ssl_handshake( } async fn handle_client( - ctx: RequestMonitoring, + ctx: RequestContext, dest_suffix: Arc, tls_config: Arc, tls_server_end_point: TlsServerEndPoint, diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs index 07769e053c..20db1fbb14 100644 --- a/proxy/src/cache/endpoints.rs +++ b/proxy/src/cache/endpoints.rs @@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken; use tracing::info; use crate::config::EndpointCacheConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; use crate::rate_limiter::GlobalRateLimiter; @@ -75,7 +75,7 @@ impl EndpointsCache { } } - pub(crate) fn is_valid(&self, ctx: &RequestMonitoring, endpoint: &EndpointId) -> bool { + pub(crate) fn is_valid(&self, ctx: &RequestContext, endpoint: &EndpointId) -> bool { if !self.ready.load(Ordering::Acquire) { // the endpoint cache is not yet fully initialised. return true; diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index b8876b44eb..e7fbe9ab47 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -18,7 +18,7 @@ use tracing::{debug, error, info, warn}; use crate::auth::parse_endpoint_param; use crate::cancellation::CancelClosure; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::ApiLockError; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::messages::MetricsAuxInfo; @@ -286,7 +286,7 @@ impl ConnCfg { /// Connect to a corresponding compute node. pub(crate) async fn connect( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, allow_self_signed_compute: bool, aux: MetricsAuxInfo, timeout: Duration, diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 8e71f552a5..c88b2936db 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -8,7 +8,7 @@ use tracing::{debug, error, info, Instrument}; use crate::auth::backend::ConsoleRedirectBackend; use crate::cancellation::{CancellationHandlerMain, CancellationHandlerMainInternal}; use crate::config::{ProxyConfig, ProxyProtocolV2}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::{Metrics, NumClientConnectionsGuard}; use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo}; @@ -82,7 +82,7 @@ pub async fn task_main( } }; - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, peer_addr, crate::metrics::Protocol::Tcp, @@ -141,7 +141,7 @@ pub async fn task_main( pub(crate) async fn handle_client( config: &'static ProxyConfig, backend: &'static ConsoleRedirectBackend, - ctx: &RequestMonitoring, + ctx: &RequestContext, cancellation_handler: Arc, stream: S, conn_gauge: NumClientConnectionsGuard<'static>, diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index d057ee0bfd..6d2d2d51ce 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -32,15 +32,15 @@ pub(crate) static LOG_CHAN_DISCONNECT: OnceCell, + TryLock, ); -struct RequestMonitoringInner { +struct RequestContextInner { pub(crate) conn_info: ConnectionInfo, pub(crate) session_id: Uuid, pub(crate) protocol: Protocol, @@ -81,10 +81,10 @@ pub(crate) enum AuthMethod { Cleartext, } -impl Clone for RequestMonitoring { +impl Clone for RequestContext { fn clone(&self) -> Self { let inner = self.0.try_lock().expect("should not deadlock"); - let new = RequestMonitoringInner { + let new = RequestContextInner { conn_info: inner.conn_info.clone(), session_id: inner.session_id, protocol: inner.protocol, @@ -115,7 +115,7 @@ impl Clone for RequestMonitoring { } } -impl RequestMonitoring { +impl RequestContext { pub fn new( session_id: Uuid, conn_info: ConnectionInfo, @@ -132,7 +132,7 @@ impl RequestMonitoring { role = tracing::field::Empty, ); - let inner = RequestMonitoringInner { + let inner = RequestContextInner { conn_info, session_id, protocol, @@ -168,7 +168,7 @@ impl RequestMonitoring { let ip = IpAddr::from([127, 0, 0, 1]); let addr = SocketAddr::new(ip, 5432); let conn_info = ConnectionInfo { addr, extra: None }; - RequestMonitoring::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test") + RequestContext::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test") } pub(crate) fn console_application_name(&self) -> String { @@ -325,7 +325,7 @@ impl RequestMonitoring { } pub(crate) struct LatencyTimerPause<'a> { - ctx: &'a RequestMonitoring, + ctx: &'a RequestContext, start: tokio::time::Instant, waiting_for: Waiting, } @@ -341,7 +341,7 @@ impl Drop for LatencyTimerPause<'_> { } } -impl RequestMonitoringInner { +impl RequestContextInner { fn set_cold_start_info(&mut self, info: ColdStartInfo) { self.cold_start_info = info; self.latency_timer.cold_start_info(info); @@ -430,7 +430,7 @@ impl RequestMonitoringInner { } } -impl Drop for RequestMonitoringInner { +impl Drop for RequestContextInner { fn drop(&mut self) { if self.sender.is_some() { self.log_connect(); diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index 4112de646f..9bf3a275bb 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -20,7 +20,7 @@ use tokio_util::sync::CancellationToken; use tracing::{debug, info, Span}; use utils::backoff; -use super::{RequestMonitoringInner, LOG_CHAN}; +use super::{RequestContextInner, LOG_CHAN}; use crate::config::remote_storage_from_toml; use crate::context::LOG_CHAN_DISCONNECT; @@ -117,8 +117,8 @@ impl serde::Serialize for Options<'_> { } } -impl From<&RequestMonitoringInner> for RequestData { - fn from(value: &RequestMonitoringInner) -> Self { +impl From<&RequestContextInner> for RequestData { + fn from(value: &RequestContextInner) -> Self { Self { session_id: value.session_id, peer_addr: value.conn_info.addr.ip().to_string(), diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index fd333d2aac..500acad50f 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -13,7 +13,7 @@ use crate::auth::backend::jwt::AuthRule; use crate::auth::backend::ComputeUserInfo; use crate::auth::IpPattern; use crate::cache::Cached; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::{CachedAllowedIps, CachedRoleSecret}; use crate::control_plane::errors::{ ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError, @@ -206,7 +206,7 @@ impl super::ControlPlaneApi for MockControlPlane { #[tracing::instrument(skip_all)] async fn get_role_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { Ok(CachedRoleSecret::new_uncached( @@ -216,7 +216,7 @@ impl super::ControlPlaneApi for MockControlPlane { async fn get_allowed_ips_and_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { Ok(( @@ -229,7 +229,7 @@ impl super::ControlPlaneApi for MockControlPlane { async fn get_endpoint_jwks( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, endpoint: EndpointId, ) -> Result, GetEndpointJwksError> { self.do_get_endpoint_jwks(endpoint).await @@ -238,7 +238,7 @@ impl super::ControlPlaneApi for MockControlPlane { #[tracing::instrument(skip_all)] async fn wake_compute( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &ComputeUserInfo, ) -> Result { self.do_wake_compute().map_ok(Cached::new_uncached).await diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index 50903e2f1e..f8f74372f0 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -15,7 +15,7 @@ use crate::auth::backend::ComputeUserInfo; use crate::cache::endpoints::EndpointsCache; use crate::cache::project_info::ProjectInfoCacheImpl; use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::{ errors, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache, }; @@ -41,7 +41,7 @@ pub enum ControlPlaneClient { impl ControlPlaneApi for ControlPlaneClient { async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { match self { @@ -57,7 +57,7 @@ impl ControlPlaneApi for ControlPlaneClient { async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), errors::GetAuthInfoError> { match self { @@ -71,7 +71,7 @@ impl ControlPlaneApi for ControlPlaneClient { async fn get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, errors::GetEndpointJwksError> { match self { @@ -85,7 +85,7 @@ impl ControlPlaneApi for ControlPlaneClient { async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { match self { @@ -271,7 +271,7 @@ impl WakeComputePermit { impl FetchAuthRules for ControlPlaneClient { async fn fetch_auth_rules( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, FetchAuthRulesError> { self.get_endpoint_jwks(ctx, endpoint) diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs index 8f4ae13f33..53f9234926 100644 --- a/proxy/src/control_plane/client/neon.rs +++ b/proxy/src/control_plane/client/neon.rs @@ -14,7 +14,7 @@ use super::super::messages::{ControlPlaneErrorMessage, GetRoleSecret, WakeComput use crate::auth::backend::jwt::AuthRule; use crate::auth::backend::ComputeUserInfo; use crate::cache::Cached; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::caches::ApiCaches; use crate::control_plane::errors::{ ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError, @@ -65,7 +65,7 @@ impl NeonControlPlaneClient { async fn do_get_auth_info( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { if !self @@ -141,7 +141,7 @@ impl NeonControlPlaneClient { async fn do_get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, GetEndpointJwksError> { if !self @@ -200,7 +200,7 @@ impl NeonControlPlaneClient { async fn do_wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { let request_id = ctx.session_id().to_string(); @@ -263,7 +263,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { #[tracing::instrument(skip_all)] async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { let normalized_ep = &user_info.endpoint.normalize(); @@ -297,7 +297,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { let normalized_ep = &user_info.endpoint.normalize(); @@ -339,7 +339,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { #[tracing::instrument(skip_all)] async fn get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, GetEndpointJwksError> { self.do_get_endpoint_jwks(ctx, endpoint).await @@ -348,7 +348,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { #[tracing::instrument(skip_all)] async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { let key = user_info.endpoint_cache_key(); diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs index 70607ac0d0..41972e4e44 100644 --- a/proxy/src/control_plane/mod.rs +++ b/proxy/src/control_plane/mod.rs @@ -17,7 +17,7 @@ use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo}; use crate::auth::IpPattern; use crate::cache::project_info::ProjectInfoCacheImpl; use crate::cache::{Cached, TimedLru}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo}; use crate::intern::ProjectIdInt; use crate::types::{EndpointCacheKey, EndpointId}; @@ -75,7 +75,7 @@ pub(crate) struct NodeInfo { impl NodeInfo { pub(crate) async fn connect( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, timeout: Duration, ) -> Result { self.config @@ -116,26 +116,26 @@ pub(crate) trait ControlPlaneApi { /// We still have to mock the scram to avoid leaking information that user doesn't exist. async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result; async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), errors::GetAuthInfoError>; async fn get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, errors::GetEndpointJwksError>; /// Wake up the compute node and return the corresponding connection info. async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result; } diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 659b7afa68..b30aec09c1 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -7,7 +7,7 @@ use super::retry::ShouldRetryWakeCompute; use crate::auth::backend::ComputeCredentialKeys; use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT}; use crate::config::RetryConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::locks::ApiLocks; use crate::control_plane::{self, CachedNodeInfo, NodeInfo}; @@ -47,7 +47,7 @@ pub(crate) trait ConnectMechanism { type Error: From; async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, timeout: time::Duration, ) -> Result; @@ -59,7 +59,7 @@ pub(crate) trait ConnectMechanism { pub(crate) trait ComputeConnectBackend { async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result; fn get_keys(&self) -> &ComputeCredentialKeys; @@ -82,7 +82,7 @@ impl ConnectMechanism for TcpMechanism<'_> { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, timeout: time::Duration, ) -> Result { @@ -99,7 +99,7 @@ impl ConnectMechanism for TcpMechanism<'_> { /// Try to connect to the compute node, retrying if necessary. #[tracing::instrument(skip_all)] pub(crate) async fn connect_to_compute( - ctx: &RequestMonitoring, + ctx: &RequestContext, mechanism: &M, user_info: &B, allow_self_signed_compute: bool, diff --git a/proxy/src/proxy/handshake.rs b/proxy/src/proxy/handshake.rs index a67f1b8112..3ada3a9995 100644 --- a/proxy/src/proxy/handshake.rs +++ b/proxy/src/proxy/handshake.rs @@ -9,7 +9,7 @@ use tracing::{info, warn}; use crate::auth::endpoint_sni; use crate::config::{TlsConfig, PG_ALPN_PROTOCOL}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::Metrics; use crate::proxy::ERR_INSECURE_CONNECTION; @@ -66,7 +66,7 @@ pub(crate) enum HandshakeData { /// we also take an extra care of propagating only the select handshake errors to client. #[tracing::instrument(skip_all)] pub(crate) async fn handshake( - ctx: &RequestMonitoring, + ctx: &RequestContext, stream: S, mut tls: Option<&TlsConfig>, record_handshake_error: bool, diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 17721c23d5..4be4006d15 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -25,7 +25,7 @@ use self::connect_compute::{connect_to_compute, TcpMechanism}; use self::passthrough::ProxyPassthrough; use crate::cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal}; use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::{Metrics, NumClientConnectionsGuard}; use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo}; @@ -117,7 +117,7 @@ pub async fn task_main( } }; - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, conn_info, crate::metrics::Protocol::Tcp, @@ -247,7 +247,7 @@ impl ReportableError for ClientRequestError { pub(crate) async fn handle_client( config: &'static ProxyConfig, auth_backend: &'static auth::Backend<'static, ()>, - ctx: &RequestMonitoring, + ctx: &RequestContext, cancellation_handler: Arc, stream: S, mode: ClientMode, diff --git a/proxy/src/proxy/tests/mitm.rs b/proxy/src/proxy/tests/mitm.rs index df9f79a7e3..fe211adfeb 100644 --- a/proxy/src/proxy/tests/mitm.rs +++ b/proxy/src/proxy/tests/mitm.rs @@ -36,7 +36,7 @@ async fn proxy_mitm( // begin handshake with end_server let end_server = connect_tls(server2, client_config2.make_tls_connect().unwrap()).await; let (end_client, startup) = match handshake( - &RequestMonitoring::test(), + &RequestContext::test(), client1, Some(&server_config1), false, diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index be821925b5..3de8ca8736 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -162,7 +162,7 @@ impl TestAuth for Scram { stream: &mut PqStream>, ) -> anyhow::Result<()> { let outcome = auth::AuthFlow::new(stream) - .begin(auth::Scram(&self.0, &RequestMonitoring::test())) + .begin(auth::Scram(&self.0, &RequestContext::test())) .await? .authenticate() .await?; @@ -182,11 +182,10 @@ async fn dummy_proxy( auth: impl TestAuth + Send, ) -> anyhow::Result<()> { let (client, _) = read_proxy_protocol(client).await?; - let mut stream = - match handshake(&RequestMonitoring::test(), client, tls.as_ref(), false).await? { - HandshakeData::Startup(stream, _) => stream, - HandshakeData::Cancel(_) => bail!("cancellation not supported"), - }; + let mut stream = match handshake(&RequestContext::test(), client, tls.as_ref(), false).await? { + HandshakeData::Startup(stream, _) => stream, + HandshakeData::Cancel(_) => bail!("cancellation not supported"), + }; auth.authenticate(&mut stream).await?; @@ -466,7 +465,7 @@ impl ConnectMechanism for TestConnectMechanism { async fn connect_once( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _node_info: &control_plane::CachedNodeInfo, _timeout: std::time::Duration, ) -> Result { @@ -581,7 +580,7 @@ fn helper_create_connect_info( async fn connect_to_compute_success() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -599,7 +598,7 @@ async fn connect_to_compute_success() { async fn connect_to_compute_retry() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -618,7 +617,7 @@ async fn connect_to_compute_retry() { async fn connect_to_compute_non_retry_1() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -637,7 +636,7 @@ async fn connect_to_compute_non_retry_1() { async fn connect_to_compute_non_retry_2() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -657,7 +656,7 @@ async fn connect_to_compute_non_retry_3() { let _ = env_logger::try_init(); tokio::time::pause(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Retry, Retry, Retry, Retry, Retry]); let user_info = helper_create_connect_info(&mechanism); @@ -689,7 +688,7 @@ async fn connect_to_compute_non_retry_3() { async fn wake_retry() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -708,7 +707,7 @@ async fn wake_retry() { async fn wake_non_retry() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { diff --git a/proxy/src/proxy/wake_compute.rs b/proxy/src/proxy/wake_compute.rs index f9f46bb66c..d09e0b1f41 100644 --- a/proxy/src/proxy/wake_compute.rs +++ b/proxy/src/proxy/wake_compute.rs @@ -2,7 +2,7 @@ use tracing::{error, info, warn}; use super::connect_compute::ComputeConnectBackend; use crate::config::RetryConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::CachedNodeInfo; use crate::error::ReportableError; @@ -13,7 +13,7 @@ use crate::proxy::retry::{retry_after, should_retry}; pub(crate) async fn wake_compute( num_retries: &mut u32, - ctx: &RequestMonitoring, + ctx: &RequestContext, api: &B, config: RetryConfig, ) -> Result { diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 5e9fd151ae..d9dcf6fbb7 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -23,7 +23,7 @@ use crate::compute_ctl::{ ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest, }; use crate::config::ProxyConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::ApiLockError; use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError}; use crate::control_plane::locks::ApiLocks; @@ -48,7 +48,7 @@ pub(crate) struct PoolingBackend { impl PoolingBackend { pub(crate) async fn authenticate_with_password( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, password: &[u8], ) -> Result { @@ -110,7 +110,7 @@ impl PoolingBackend { pub(crate) async fn authenticate_with_jwt( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, jwt: String, ) -> Result { @@ -161,7 +161,7 @@ impl PoolingBackend { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] pub(crate) async fn connect_to_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, keys: ComputeCredentials, force_new: bool, @@ -201,7 +201,7 @@ impl PoolingBackend { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] pub(crate) async fn connect_to_local_proxy( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, ) -> Result, HttpConnError> { info!("pool: looking for an existing connection"); @@ -249,7 +249,7 @@ impl PoolingBackend { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] pub(crate) async fn connect_to_local_postgres( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, ) -> Result, HttpConnError> { if let Some(client) = self.local_pool.get(ctx, &conn_info)? { @@ -490,7 +490,7 @@ impl ConnectMechanism for TokioMechanism { async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &CachedNodeInfo, timeout: Duration, ) -> Result { @@ -540,7 +540,7 @@ impl ConnectMechanism for HyperMechanism { async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &CachedNodeInfo, timeout: Duration, ) -> Result { diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index 1845603bf7..07ba1ae9af 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -21,7 +21,7 @@ use { use super::conn_pool_lib::{ Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, GlobalConnPool, }; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::MetricsAuxInfo; use crate::metrics::Metrics; @@ -53,7 +53,7 @@ impl fmt::Display for ConnInfo { pub(crate) fn poll_client( global_pool: Arc>, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, client: C, mut connection: tokio_postgres::Connection, diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs index 61c39c32c9..fe3c422c3b 100644 --- a/proxy/src/serverless/conn_pool_lib.rs +++ b/proxy/src/serverless/conn_pool_lib.rs @@ -15,7 +15,7 @@ use super::conn_pool::ClientDataRemote; use super::http_conn_pool::ClientDataHttp; use super::local_conn_pool::ClientDataLocal; use crate::auth::backend::ComputeUserInfo; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::types::{DbName, EndpointCacheKey, RoleName}; @@ -380,7 +380,7 @@ impl GlobalConnPool { pub(crate) fn get( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, ) -> Result>, HttpConnError> { let mut client: Option> = None; diff --git a/proxy/src/serverless/http_conn_pool.rs b/proxy/src/serverless/http_conn_pool.rs index a1d4473b01..bc86c4b1cd 100644 --- a/proxy/src/serverless/http_conn_pool.rs +++ b/proxy/src/serverless/http_conn_pool.rs @@ -12,7 +12,7 @@ use tracing::{debug, error, info, info_span, Instrument}; use super::backend::HttpConnError; use super::conn_pool_lib::{ClientInnerExt, ConnInfo}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::types::EndpointCacheKey; @@ -212,7 +212,7 @@ impl GlobalConnPool { #[expect(unused_results)] pub(crate) fn get( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, ) -> Result>, HttpConnError> { let result: Result>, HttpConnError>; @@ -280,7 +280,7 @@ impl GlobalConnPool { pub(crate) fn poll_http2_client( global_pool: Arc>, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, client: Send, connection: Connect, diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs index 99d4329f88..cadcbd7530 100644 --- a/proxy/src/serverless/local_conn_pool.rs +++ b/proxy/src/serverless/local_conn_pool.rs @@ -36,7 +36,7 @@ use super::conn_pool_lib::{ Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, DbUserConn, EndpointConnPool, }; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::Metrics; @@ -88,7 +88,7 @@ impl LocalConnPool { pub(crate) fn get( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, ) -> Result>, HttpConnError> { let client = self @@ -159,7 +159,7 @@ impl LocalConnPool { #[allow(clippy::too_many_arguments)] pub(crate) fn poll_client( global_pool: Arc>, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, client: C, mut connection: tokio_postgres::Connection, diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index cf758855fa..59247f03bf 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -45,7 +45,7 @@ use utils::http::error::ApiError; use crate::cancellation::CancellationHandlerMain; use crate::config::{ProxyConfig, ProxyProtocolV2}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::metrics::Metrics; use crate::protocol2::{read_proxy_protocol, ChainRW, ConnectHeader, ConnectionInfo}; use crate::proxy::run_until_cancelled; @@ -423,7 +423,7 @@ async fn request_handler( if config.http_config.accept_websockets && framed_websockets::upgrade::is_upgrade_request(&request) { - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, conn_info, crate::metrics::Protocol::Ws, @@ -458,7 +458,7 @@ async fn request_handler( // Return the response so the spawned future can continue. Ok(response.map(|b| b.map_err(|x| match x {}).boxed())) } else if request.uri().path() == "/sql" && *request.method() == Method::POST { - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, conn_info, crate::metrics::Protocol::Http, diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index f0975617d4..36d8595902 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -34,7 +34,7 @@ use super::json::{json_to_pg_text, pg_text_row_to_json, JsonConversionError}; use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo}; use crate::auth::{endpoint_sni, ComputeUserInfoParseError}; use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::{ErrorKind, ReportableError, UserFacingError}; use crate::metrics::{HttpDirection, Metrics}; use crate::proxy::{run_until_cancelled, NeonOptions}; @@ -133,7 +133,7 @@ impl UserFacingError for ConnInfoError { fn get_conn_info( config: &'static AuthenticationConfig, - ctx: &RequestMonitoring, + ctx: &RequestContext, headers: &HeaderMap, tls: Option<&TlsConfig>, ) -> Result { @@ -240,7 +240,7 @@ fn get_conn_info( pub(crate) async fn handle( config: &'static ProxyConfig, - ctx: RequestMonitoring, + ctx: RequestContext, request: Request, backend: Arc, cancel: CancellationToken, @@ -516,7 +516,7 @@ fn map_isolation_level_to_headers(level: IsolationLevel) -> Option async fn handle_inner( cancel: CancellationToken, config: &'static ProxyConfig, - ctx: &RequestMonitoring, + ctx: &RequestContext, request: Request, backend: Arc, ) -> Result>, SqlOverHttpError> { @@ -562,7 +562,7 @@ async fn handle_inner( async fn handle_db_inner( cancel: CancellationToken, config: &'static ProxyConfig, - ctx: &RequestMonitoring, + ctx: &RequestContext, request: Request, conn_info: ConnInfo, auth: AuthData, @@ -733,7 +733,7 @@ pub(crate) fn uuid_to_header_value(id: Uuid) -> HeaderValue { } async fn handle_auth_broker_inner( - ctx: &RequestMonitoring, + ctx: &RequestContext, request: Request, conn_info: ConnInfo, jwt: String, diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs index ba36116c2c..4088fea835 100644 --- a/proxy/src/serverless/websocket.rs +++ b/proxy/src/serverless/websocket.rs @@ -14,7 +14,7 @@ use tracing::warn; use crate::cancellation::CancellationHandlerMain; use crate::config::ProxyConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::{io_error, ReportableError}; use crate::metrics::Metrics; use crate::proxy::{handle_client, ClientMode, ErrorSource}; @@ -126,7 +126,7 @@ impl AsyncBufRead for WebSocketRw { pub(crate) async fn serve_websocket( config: &'static ProxyConfig, auth_backend: &'static crate::auth::Backend<'static, ()>, - ctx: RequestMonitoring, + ctx: RequestContext, websocket: OnUpgrade, cancellation_handler: Arc, endpoint_rate_limiter: Arc, From b695907752b2bf07a628413824a102eca04e33d1 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 18 Nov 2024 20:25:44 +0100 Subject: [PATCH 22/22] page_service: add benchmark for batching This PR adds a benchmark to demonstrate the effect of server-side getpage request batching added in https://github.com/neondatabase/neon/pull/9321. Refs: - Epic: https://github.com/neondatabase/neon/issues/9376 - Extracted from https://github.com/neondatabase/neon/pull/9792 --- test_runner/performance/README.md | 3 +- .../test_pageserver_getpage_merge.py | 197 ++++++++++++++++++ 2 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 test_runner/performance/pageserver/test_pageserver_getpage_merge.py diff --git a/test_runner/performance/README.md b/test_runner/performance/README.md index 70d75a6dcf..85096d3770 100644 --- a/test_runner/performance/README.md +++ b/test_runner/performance/README.md @@ -15,6 +15,7 @@ Some handy pytest flags for local development: - `-k` selects a test to run - `--timeout=0` disables our default timeout of 300s (see `setup.cfg`) - `--preserve-database-files` to skip cleanup +- `--out-dir` to produce a JSON with the recorded test metrics # What performance tests do we have and how we run them @@ -36,6 +37,6 @@ All tests run only once. Usually to obtain more consistent performance numbers, ## Results collection -Local test results for main branch, and results of daily performance tests, are stored in a neon project deployed in production environment. There is a Grafana dashboard that visualizes the results. Here is the [dashboard](https://observer.zenith.tech/d/DGKBm9Jnz/perf-test-results?orgId=1). The main problem with it is the unavailability to point at particular commit, though the data for that is available in the database. Needs some tweaking from someone who knows Grafana tricks. +Local test results for main branch, and results of daily performance tests, are stored in a [neon project](https://console.neon.tech/app/projects/withered-sky-69117821) deployed in production environment. There is a Grafana dashboard that visualizes the results. Here is the [dashboard](https://observer.zenith.tech/d/DGKBm9Jnz/perf-test-results?orgId=1). The main problem with it is the unavailability to point at particular commit, though the data for that is available in the database. Needs some tweaking from someone who knows Grafana tricks. There is also an inconsistency in test naming. Test name should be the same across platforms, and results can be differentiated by the platform field. But currently, platform is sometimes included in test name because of the way how parametrization works in pytest. I.e. there is a platform switch in the dashboard with neon-local-ci and neon-staging variants. I.e. some tests under neon-local-ci value for a platform switch are displayed as `Test test_runner/performance/test_bulk_insert.py::test_bulk_insert[vanilla]` and `Test test_runner/performance/test_bulk_insert.py::test_bulk_insert[neon]` which is highly confusing. diff --git a/test_runner/performance/pageserver/test_pageserver_getpage_merge.py b/test_runner/performance/pageserver/test_pageserver_getpage_merge.py new file mode 100644 index 0000000000..be7cf66c79 --- /dev/null +++ b/test_runner/performance/pageserver/test_pageserver_getpage_merge.py @@ -0,0 +1,197 @@ +import dataclasses +import time +from dataclasses import dataclass +from typing import Any, Optional + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import humantime_to_ms + +TARGET_RUNTIME = 60 + + +@pytest.mark.parametrize( + "tablesize_mib, batch_timeout, target_runtime, effective_io_concurrency, readhead_buffer_size, name", + [ + # the next 4 cases demonstrate how not-batchable workloads suffer from batching timeout + (50, None, TARGET_RUNTIME, 1, 128, "not batchable no batching"), + (50, "10us", TARGET_RUNTIME, 1, 128, "not batchable 10us timeout"), + (50, "1ms", TARGET_RUNTIME, 1, 128, "not batchable 1ms timeout"), + # the next 4 cases demonstrate how batchable workloads benefit from batching + (50, None, TARGET_RUNTIME, 100, 128, "batchable no batching"), + (50, "10us", TARGET_RUNTIME, 100, 128, "batchable 10us timeout"), + (50, "100us", TARGET_RUNTIME, 100, 128, "batchable 100us timeout"), + (50, "1ms", TARGET_RUNTIME, 100, 128, "batchable 1ms timeout"), + ], +) +def test_getpage_merge_smoke( + neon_env_builder: NeonEnvBuilder, + zenbenchmark: NeonBenchmarker, + tablesize_mib: int, + batch_timeout: Optional[str], + target_runtime: int, + effective_io_concurrency: int, + readhead_buffer_size: int, + name: str, +): + """ + Do a bunch of sequential scans and ensure that the pageserver does some merging. + """ + + # + # record perf-related parameters as metrics to simplify processing of results + # + params: dict[str, tuple[float | int, dict[str, Any]]] = {} + + params.update( + { + "tablesize_mib": (tablesize_mib, {"unit": "MiB"}), + "batch_timeout": ( + -1 if batch_timeout is None else 1e3 * humantime_to_ms(batch_timeout), + {"unit": "us"}, + ), + # target_runtime is just a polite ask to the workload to run for this long + "effective_io_concurrency": (effective_io_concurrency, {}), + "readhead_buffer_size": (readhead_buffer_size, {}), + # name is not a metric + } + ) + + log.info("params: %s", params) + + for param, (value, kwargs) in params.items(): + zenbenchmark.record( + param, + metric_value=value, + unit=kwargs.pop("unit", ""), + report=MetricReport.TEST_PARAM, + **kwargs, + ) + + # + # Setup + # + + env = neon_env_builder.init_start() + ps_http = env.pageserver.http_client() + endpoint = env.endpoints.create_start("main") + conn = endpoint.connect() + cur = conn.cursor() + + cur.execute("SET max_parallel_workers_per_gather=0") # disable parallel backends + cur.execute(f"SET effective_io_concurrency={effective_io_concurrency}") + cur.execute( + f"SET neon.readahead_buffer_size={readhead_buffer_size}" + ) # this is the current default value, but let's hard-code that + + cur.execute("CREATE EXTENSION IF NOT EXISTS neon;") + cur.execute("CREATE EXTENSION IF NOT EXISTS neon_test_utils;") + + log.info("Filling the table") + cur.execute("CREATE TABLE t (data char(1000)) with (fillfactor=10)") + tablesize = tablesize_mib * 1024 * 1024 + npages = tablesize // (8 * 1024) + cur.execute("INSERT INTO t SELECT generate_series(1, %s)", (npages,)) + # TODO: can we force postgres to do sequential scans? + + # + # Run the workload, collect `Metrics` before and after, calculate difference, normalize. + # + + @dataclass + class Metrics: + time: float + pageserver_getpage_count: float + pageserver_vectored_get_count: float + compute_getpage_count: float + pageserver_cpu_seconds_total: float + + def __sub__(self, other: "Metrics") -> "Metrics": + return Metrics( + time=self.time - other.time, + pageserver_getpage_count=self.pageserver_getpage_count + - other.pageserver_getpage_count, + pageserver_vectored_get_count=self.pageserver_vectored_get_count + - other.pageserver_vectored_get_count, + compute_getpage_count=self.compute_getpage_count - other.compute_getpage_count, + pageserver_cpu_seconds_total=self.pageserver_cpu_seconds_total + - other.pageserver_cpu_seconds_total, + ) + + def normalize(self, by) -> "Metrics": + return Metrics( + time=self.time / by, + pageserver_getpage_count=self.pageserver_getpage_count / by, + pageserver_vectored_get_count=self.pageserver_vectored_get_count / by, + compute_getpage_count=self.compute_getpage_count / by, + pageserver_cpu_seconds_total=self.pageserver_cpu_seconds_total / by, + ) + + def get_metrics() -> Metrics: + with conn.cursor() as cur: + cur.execute( + "select value from neon_perf_counters where metric='getpage_wait_seconds_count';" + ) + compute_getpage_count = cur.fetchall()[0][0] + pageserver_metrics = ps_http.get_metrics() + return Metrics( + time=time.time(), + pageserver_getpage_count=pageserver_metrics.query_one( + "pageserver_smgr_query_seconds_count", {"smgr_query_type": "get_page_at_lsn"} + ).value, + pageserver_vectored_get_count=pageserver_metrics.query_one( + "pageserver_get_vectored_seconds_count", {"task_kind": "PageRequestHandler"} + ).value, + compute_getpage_count=compute_getpage_count, + pageserver_cpu_seconds_total=pageserver_metrics.query_one( + "process_cpu_seconds_total" + ).value, + ) + + def workload() -> Metrics: + start = time.time() + iters = 0 + while time.time() - start < target_runtime or iters < 2: + log.info("Seqscan %d", iters) + if iters == 1: + # round zero for warming up + before = get_metrics() + cur.execute( + "select clear_buffer_cache()" + ) # TODO: what about LFC? doesn't matter right now because LFC isn't enabled by default in tests + cur.execute("select sum(data::bigint) from t") + assert cur.fetchall()[0][0] == npages * (npages + 1) // 2 + iters += 1 + after = get_metrics() + return (after - before).normalize(iters - 1) + + env.pageserver.patch_config_toml_nonrecursive({"server_side_batch_timeout": batch_timeout}) + env.pageserver.restart() + metrics = workload() + + log.info("Results: %s", metrics) + + # + # Sanity-checks on the collected data + # + def close_enough(a, b): + return (a / b > 0.99 and a / b < 1.01) and (b / a > 0.99 and b / a < 1.01) + + # assert that getpage counts roughly match between compute and ps + assert close_enough(metrics.pageserver_getpage_count, metrics.compute_getpage_count) + + # + # Record the results + # + + for metric, value in dataclasses.asdict(metrics).items(): + zenbenchmark.record(f"counters.{metric}", value, unit="", report=MetricReport.TEST_PARAM) + + zenbenchmark.record( + "perfmetric.batching_factor", + metrics.pageserver_getpage_count / metrics.pageserver_vectored_get_count, + unit="", + report=MetricReport.HIGHER_IS_BETTER, + )