From 2451969d5c533ab4ca085cdce12fdd79d55cca8b Mon Sep 17 00:00:00 2001 From: Rahul Patil Date: Fri, 13 Dec 2024 13:22:15 +0100 Subject: [PATCH 01/89] fix(ci): Allow github-action-script to post reports (#10136) Allow github-action-script to post reports. Failed CI: https://github.com/neondatabase/neon/actions/runs/12304655364/job/34342554049#step:13:514 --- .github/workflows/build_and_test.yml | 15 +++++++++++++++ .github/workflows/periodic_pagebench.yml | 8 +++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a3943cba91..b3556debe3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -303,6 +303,11 @@ jobs: benchmarks: if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks') needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ] + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write runs-on: [ self-hosted, small ] container: image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm @@ -343,6 +348,11 @@ jobs: report-benchmarks-failures: needs: [ benchmarks, create-test-report ] if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure' + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write runs-on: ubuntu-22.04 steps: @@ -1024,6 +1034,11 @@ jobs: trigger-custom-extensions-build-and-wait: needs: [ check-permissions, tag ] runs-on: ubuntu-22.04 + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write steps: - name: Set PR's status to pending and request a remote CI test run: | diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 9f5a16feca..049990f17b 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -21,15 +21,17 @@ defaults: run: shell: bash -euo pipefail {0} -permissions: - id-token: write # aws-actions/configure-aws-credentials - concurrency: group: ${{ github.workflow }} cancel-in-progress: false jobs: trigger_bench_on_ec2_machine_in_eu_central_1: + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write + pull-requests: write runs-on: [ self-hosted, small ] container: image: neondatabase/build-tools:pinned-bookworm From 7dc382601cb9f2c508f11acdac897a8a0bbbdce3 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:59:04 +0100 Subject: [PATCH 02/89] Fix pg_regress tests on a cloud staging instance (#10134) ## Problem pg_regress tests start failing due to unique ids added to Neon error messages ## Summary of changes Patches updated --- .github/workflows/cloud-regress.yml | 1 + compute/patches/cloud_regress_pg16.patch | 173 ++++++++++++---------- compute/patches/cloud_regress_pg17.patch | 179 ++++++++++++----------- 3 files changed, 190 insertions(+), 163 deletions(-) diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 2fc26baa21..7b9e434ec3 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -99,6 +99,7 @@ jobs: BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}} - name: Delete branch + if: always() uses: ./.github/actions/neon-branch-delete with: api_key: ${{ secrets.NEON_STAGING_API_KEY }} diff --git a/compute/patches/cloud_regress_pg16.patch b/compute/patches/cloud_regress_pg16.patch index a4b93d0260..3f0bb84ae7 100644 --- a/compute/patches/cloud_regress_pg16.patch +++ b/compute/patches/cloud_regress_pg16.patch @@ -981,7 +981,7 @@ index fc42d418bf..e38f517574 100644 CREATE SCHEMA addr_nsp; SET search_path TO 'addr_nsp'; diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out -index 8475231735..1afae5395f 100644 +index 8475231735..0653946337 100644 --- a/src/test/regress/expected/password.out +++ b/src/test/regress/expected/password.out @@ -12,11 +12,11 @@ SET password_encryption = 'md5'; -- ok @@ -1006,65 +1006,63 @@ index 8475231735..1afae5395f 100644 -----------------+--------------------------------------------------- - regress_passwd1 | md5783277baca28003b33453252be4dbb34 - regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3 -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: - regress_passwd4 | + regress_passwd4 | SCRAM-SHA-256$4096:$: (4 rows) -- Rename a role -@@ -54,24 +54,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -54,24 +54,16 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; -- passwords. SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +--- already encrypted with MD5, use as it is +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- This looks like a valid SCRAM-SHA-256 secret, but it is not - -- so it should be hashed with SCRAM-SHA-256. - CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- These may look like valid MD5 secrets, but they are not, so they - -- should be hashed with SCRAM-SHA-256. - -- trailing garbage at the end - CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- invalid length - CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Changing the SCRAM iteration count SET scram_iterations = 1024; CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount'; -@@ -81,63 +87,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ +@@ -81,11 +73,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ ORDER BY rolname, rolpassword; rolname | rolpassword_masked -----------------+--------------------------------------------------- - regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70 - regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: regress_passwd4 | SCRAM-SHA-256$4096:$: - regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023 -- regress_passwd6 | SCRAM-SHA-256$4096:$: -- regress_passwd7 | SCRAM-SHA-256$4096:$: -- regress_passwd8 | SCRAM-SHA-256$4096:$: - regress_passwd9 | SCRAM-SHA-256$1024:$: --(9 rows) -+(5 rows) - ++ regress_passwd5 | SCRAM-SHA-256$4096:$: + regress_passwd6 | SCRAM-SHA-256$4096:$: + regress_passwd7 | SCRAM-SHA-256$4096:$: + regress_passwd8 | SCRAM-SHA-256$4096:$: +@@ -95,23 +87,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ -- An empty password is not allowed, in any form CREATE ROLE regress_passwd_empty PASSWORD ''; NOTICE: empty string is not a valid password, clearing password @@ -1082,56 +1080,37 @@ index 8475231735..1afae5395f 100644 -(1 row) +(0 rows) - -- Test with invalid stored and server keys. - -- - -- The first is valid, to act as a control. The others have too long - -- stored/server keys. They will be re-hashed. - CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Check that the invalid secrets were re-hashed. A re-hashed secret -- should not contain the original salt. SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed - FROM pg_authid - WHERE rolname LIKE 'regress_passwd_sha_len%' +@@ -120,7 +109,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw ORDER BY rolname; -- rolname | is_rolpassword_rehashed ---------------------------+------------------------- + rolname | is_rolpassword_rehashed + -------------------------+------------------------- - regress_passwd_sha_len0 | f -- regress_passwd_sha_len1 | t -- regress_passwd_sha_len2 | t --(3 rows) -+ rolname | is_rolpassword_rehashed -+---------+------------------------- -+(0 rows) - - DROP ROLE regress_passwd1; - DROP ROLE regress_passwd2; - DROP ROLE regress_passwd3; - DROP ROLE regress_passwd4; - DROP ROLE regress_passwd5; -+ERROR: role "regress_passwd5" does not exist - DROP ROLE regress_passwd6; -+ERROR: role "regress_passwd6" does not exist - DROP ROLE regress_passwd7; -+ERROR: role "regress_passwd7" does not exist ++ regress_passwd_sha_len0 | t + regress_passwd_sha_len1 | t + regress_passwd_sha_len2 | t + (3 rows) +@@ -135,6 +124,7 @@ DROP ROLE regress_passwd7; DROP ROLE regress_passwd8; -+ERROR: role "regress_passwd8" does not exist DROP ROLE regress_passwd9; DROP ROLE regress_passwd_empty; +ERROR: role "regress_passwd_empty" does not exist DROP ROLE regress_passwd_sha_len0; -+ERROR: role "regress_passwd_sha_len0" does not exist DROP ROLE regress_passwd_sha_len1; -+ERROR: role "regress_passwd_sha_len1" does not exist DROP ROLE regress_passwd_sha_len2; -+ERROR: role "regress_passwd_sha_len2" does not exist - -- all entries should have been removed - SELECT rolname, rolpassword - FROM pg_authid diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 5b9dba7b32..cc408dad42 100644 --- a/src/test/regress/expected/privileges.out @@ -3194,7 +3173,7 @@ index 1a6c61f49d..1c31ac6a53 100644 -- Test generic object addressing/identification functions CREATE SCHEMA addr_nsp; diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql -index 53e86b0b6c..f07cf1ec54 100644 +index 53e86b0b6c..0303fdfe96 100644 --- a/src/test/regress/sql/password.sql +++ b/src/test/regress/sql/password.sql @@ -10,11 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok @@ -3213,23 +3192,59 @@ index 53e86b0b6c..f07cf1ec54 100644 -- check list of created entries -- -@@ -42,14 +42,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -42,26 +42,18 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +--- already encrypted with MD5, use as it is +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Changing the SCRAM iteration count + SET scram_iterations = 1024; +@@ -78,13 +70,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a'; + ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4='; + SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty'; + +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Check that the invalid secrets were re-hashed. A re-hashed secret + -- should not contain the original salt. diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 249df17a58..b258e7f26a 100644 --- a/src/test/regress/sql/privileges.sql diff --git a/compute/patches/cloud_regress_pg17.patch b/compute/patches/cloud_regress_pg17.patch index cbe84ef54b..e57447a2c6 100644 --- a/compute/patches/cloud_regress_pg17.patch +++ b/compute/patches/cloud_regress_pg17.patch @@ -1014,10 +1014,10 @@ index fc42d418bf..e38f517574 100644 CREATE SCHEMA addr_nsp; SET search_path TO 'addr_nsp'; diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out -index 924d6e001d..5966531db6 100644 +index 924d6e001d..7fdda73439 100644 --- a/src/test/regress/expected/password.out +++ b/src/test/regress/expected/password.out -@@ -12,13 +12,13 @@ SET password_encryption = 'md5'; -- ok +@@ -12,13 +12,11 @@ SET password_encryption = 'md5'; -- ok SET password_encryption = 'scram-sha-256'; -- ok -- consistency of password entries SET password_encryption = 'md5'; @@ -1026,9 +1026,7 @@ index 924d6e001d..5966531db6 100644 -CREATE ROLE regress_passwd2; -ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2'; +CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; +CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; SET password_encryption = 'scram-sha-256'; -CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3'; -CREATE ROLE regress_passwd4 PASSWORD NULL; @@ -1037,71 +1035,69 @@ index 924d6e001d..5966531db6 100644 -- check list of created entries -- -- The scram secret will look something like: -@@ -32,10 +32,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ +@@ -32,10 +30,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ ORDER BY rolname, rolpassword; rolname | rolpassword_masked -----------------+--------------------------------------------------- - regress_passwd1 | md5783277baca28003b33453252be4dbb34 - regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3 -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: - regress_passwd4 | + regress_passwd4 | SCRAM-SHA-256$4096:$: (4 rows) -- Rename a role -@@ -56,24 +56,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -56,24 +54,17 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; -- passwords. SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- This looks like a valid SCRAM-SHA-256 secret, but it is not - -- so it should be hashed with SCRAM-SHA-256. - CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- These may look like valid MD5 secrets, but they are not, so they - -- should be hashed with SCRAM-SHA-256. - -- trailing garbage at the end - CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - -- invalid length - CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Changing the SCRAM iteration count SET scram_iterations = 1024; CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount'; -@@ -83,63 +89,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ +@@ -83,11 +74,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ ORDER BY rolname, rolpassword; rolname | rolpassword_masked -----------------+--------------------------------------------------- - regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70 - regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb -+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 -+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2 regress_passwd3 | SCRAM-SHA-256$4096:$: regress_passwd4 | SCRAM-SHA-256$4096:$: - regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023 -- regress_passwd6 | SCRAM-SHA-256$4096:$: -- regress_passwd7 | SCRAM-SHA-256$4096:$: -- regress_passwd8 | SCRAM-SHA-256$4096:$: - regress_passwd9 | SCRAM-SHA-256$1024:$: --(9 rows) -+(5 rows) - ++ regress_passwd5 | SCRAM-SHA-256$4096:$: + regress_passwd6 | SCRAM-SHA-256$4096:$: + regress_passwd7 | SCRAM-SHA-256$4096:$: + regress_passwd8 | SCRAM-SHA-256$4096:$: +@@ -97,23 +88,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ -- An empty password is not allowed, in any form CREATE ROLE regress_passwd_empty PASSWORD ''; NOTICE: empty string is not a valid password, clearing password @@ -1119,56 +1115,37 @@ index 924d6e001d..5966531db6 100644 -(1 row) +(0 rows) - -- Test with invalid stored and server keys. - -- - -- The first is valid, to act as a control. The others have too long - -- stored/server keys. They will be re-hashed. - CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} - CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; -+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- Check that the invalid secrets were re-hashed. A re-hashed secret -- should not contain the original salt. SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed - FROM pg_authid - WHERE rolname LIKE 'regress_passwd_sha_len%' +@@ -122,7 +110,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw ORDER BY rolname; -- rolname | is_rolpassword_rehashed ---------------------------+------------------------- + rolname | is_rolpassword_rehashed + -------------------------+------------------------- - regress_passwd_sha_len0 | f -- regress_passwd_sha_len1 | t -- regress_passwd_sha_len2 | t --(3 rows) -+ rolname | is_rolpassword_rehashed -+---------+------------------------- -+(0 rows) - - DROP ROLE regress_passwd1; - DROP ROLE regress_passwd2; - DROP ROLE regress_passwd3; - DROP ROLE regress_passwd4; - DROP ROLE regress_passwd5; -+ERROR: role "regress_passwd5" does not exist - DROP ROLE regress_passwd6; -+ERROR: role "regress_passwd6" does not exist - DROP ROLE regress_passwd7; -+ERROR: role "regress_passwd7" does not exist ++ regress_passwd_sha_len0 | t + regress_passwd_sha_len1 | t + regress_passwd_sha_len2 | t + (3 rows) +@@ -137,6 +125,7 @@ DROP ROLE regress_passwd7; DROP ROLE regress_passwd8; -+ERROR: role "regress_passwd8" does not exist DROP ROLE regress_passwd9; DROP ROLE regress_passwd_empty; +ERROR: role "regress_passwd_empty" does not exist DROP ROLE regress_passwd_sha_len0; -+ERROR: role "regress_passwd_sha_len0" does not exist DROP ROLE regress_passwd_sha_len1; -+ERROR: role "regress_passwd_sha_len1" does not exist DROP ROLE regress_passwd_sha_len2; -+ERROR: role "regress_passwd_sha_len2" does not exist - -- all entries should have been removed - SELECT rolname, rolpassword - FROM pg_authid diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 1296da0d57..f43fffa44c 100644 --- a/src/test/regress/expected/privileges.out @@ -3249,10 +3226,10 @@ index 1a6c61f49d..1c31ac6a53 100644 -- Test generic object addressing/identification functions CREATE SCHEMA addr_nsp; diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql -index bb82aa4aa2..7424c91b10 100644 +index bb82aa4aa2..dd8a05e24d 100644 --- a/src/test/regress/sql/password.sql +++ b/src/test/regress/sql/password.sql -@@ -10,13 +10,13 @@ SET password_encryption = 'scram-sha-256'; -- ok +@@ -10,13 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok -- consistency of password entries SET password_encryption = 'md5'; @@ -3261,9 +3238,7 @@ index bb82aa4aa2..7424c91b10 100644 -CREATE ROLE regress_passwd2; -ALTER ROLE regress_passwd2 PASSWORD 'role_pwd2'; +CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; +CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; -+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; SET password_encryption = 'scram-sha-256'; -CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3'; -CREATE ROLE regress_passwd4 PASSWORD NULL; @@ -3272,23 +3247,59 @@ index bb82aa4aa2..7424c91b10 100644 -- check list of created entries -- -@@ -44,14 +44,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; +@@ -44,26 +42,19 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; SET password_encryption = 'md5'; -- encrypt with MD5 -ALTER ROLE regress_passwd2 PASSWORD 'foo'; +--- already encrypted, use as they are +-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; +-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; +ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; - -- already encrypted, use as they are - ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; - ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; SET password_encryption = 'scram-sha-256'; -- create SCRAM secret -ALTER ROLE regress_passwd4 PASSWORD 'foo'; +ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- already encrypted with MD5, use as it is - CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; +-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER; +--- This looks like a valid SCRAM-SHA-256 secret, but it is not +--- so it should be hashed with SCRAM-SHA-256. +-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; +--- These may look like valid MD5 secrets, but they are not, so they +--- should be hashed with SCRAM-SHA-256. +--- trailing garbage at the end +-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; +--- invalid length +-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Changing the SCRAM iteration count + SET scram_iterations = 1024; +@@ -80,13 +71,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a'; + ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4='; + SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty'; + +--- Test with invalid stored and server keys. +--- +--- The first is valid, to act as a control. The others have too long +--- stored/server keys. They will be re-hashed. +-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; +-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++-- Neon does not support encrypted passwords, use unencrypted instead ++CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Check that the invalid secrets were re-hashed. A re-hashed secret + -- should not contain the original salt. diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index 5880bc018d..27aa952b18 100644 --- a/src/test/regress/sql/privileges.sql From ce8eb089f3d002e5057f860454adeb0993431a19 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Fri, 13 Dec 2024 17:06:27 +0300 Subject: [PATCH 03/89] Extract public sk types to safekeeper_api (#10137) ## Problem We want to extract safekeeper http client to separate crate for use in storage controller and neon_local. However, many types used in the API are internal to safekeeper. ## Summary of changes Move them to safekeeper_api crate. No functional changes. ref https://github.com/neondatabase/neon/issues/9011 --- Cargo.lock | 3 + libs/safekeeper_api/Cargo.toml | 5 +- libs/safekeeper_api/src/lib.rs | 17 ++ libs/safekeeper_api/src/models.rs | 170 +++++++++++++++++- safekeeper/src/control_file_upgrade.rs | 3 +- safekeeper/src/debug_dump.rs | 2 +- safekeeper/src/handler.rs | 4 +- safekeeper/src/http/client.rs | 3 +- safekeeper/src/http/routes.rs | 72 ++------ safekeeper/src/json_ctrl.rs | 5 +- safekeeper/src/pull_timeline.rs | 7 +- safekeeper/src/receive_wal.rs | 21 +-- safekeeper/src/recovery.rs | 8 +- safekeeper/src/safekeeper.rs | 19 +- safekeeper/src/send_wal.rs | 89 +-------- safekeeper/src/state.rs | 7 +- safekeeper/src/timeline.rs | 55 ++---- safekeeper/src/timeline_manager.rs | 4 +- safekeeper/src/timelines_global_map.rs | 2 +- safekeeper/src/wal_backup.rs | 3 +- safekeeper/src/wal_backup_partial.rs | 2 +- safekeeper/src/wal_reader_stream.rs | 2 +- safekeeper/src/wal_service.rs | 3 +- .../tests/walproposer_sim/safekeeper.rs | 3 +- 24 files changed, 264 insertions(+), 245 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2d5e03613..c4f80f63c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5565,7 +5565,10 @@ name = "safekeeper_api" version = "0.1.0" dependencies = [ "const_format", + "postgres_ffi", + "pq_proto", "serde", + "tokio", "utils", ] diff --git a/libs/safekeeper_api/Cargo.toml b/libs/safekeeper_api/Cargo.toml index 14811232d3..4234ec6779 100644 --- a/libs/safekeeper_api/Cargo.toml +++ b/libs/safekeeper_api/Cargo.toml @@ -5,6 +5,9 @@ edition.workspace = true license.workspace = true [dependencies] -serde.workspace = true const_format.workspace = true +serde.workspace = true +postgres_ffi.workspace = true +pq_proto.workspace = true +tokio.workspace = true utils.workspace = true diff --git a/libs/safekeeper_api/src/lib.rs b/libs/safekeeper_api/src/lib.rs index 63c2c51188..be6923aca9 100644 --- a/libs/safekeeper_api/src/lib.rs +++ b/libs/safekeeper_api/src/lib.rs @@ -1,10 +1,27 @@ #![deny(unsafe_code)] #![deny(clippy::undocumented_unsafe_blocks)] use const_format::formatcp; +use pq_proto::SystemId; +use serde::{Deserialize, Serialize}; /// Public API types pub mod models; +/// Consensus logical timestamp. Note: it is a part of sk control file. +pub type Term = u64; +pub const INVALID_TERM: Term = 0; + +/// Information about Postgres. Safekeeper gets it once and then verifies all +/// further connections from computes match. Note: it is a part of sk control +/// file. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ServerInfo { + /// Postgres server version + pub pg_version: u32, + pub system_id: SystemId, + pub wal_seg_size: u32, +} + pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454; pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}"); diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index 28666d197a..3e424a792c 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -1,10 +1,23 @@ +//! Types used in safekeeper http API. Many of them are also reused internally. + +use postgres_ffi::TimestampTz; use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; +use tokio::time::Instant; use utils::{ - id::{NodeId, TenantId, TimelineId}, + id::{NodeId, TenantId, TenantTimelineId, TimelineId}, lsn::Lsn, + pageserver_feedback::PageserverFeedback, }; +use crate::{ServerInfo, Term}; + +#[derive(Debug, Serialize)] +pub struct SafekeeperStatus { + pub id: NodeId, +} + #[derive(Serialize, Deserialize)] pub struct TimelineCreateRequest { pub tenant_id: TenantId, @@ -18,6 +31,161 @@ pub struct TimelineCreateRequest { pub local_start_lsn: Option, } +/// Same as TermLsn, but serializes LSN using display serializer +/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct TermSwitchApiEntry { + pub term: Term, + pub lsn: Lsn, +} + +/// Augment AcceptorState with last_log_term for convenience +#[derive(Debug, Serialize, Deserialize)] +pub struct AcceptorStateStatus { + pub term: Term, + pub epoch: Term, // aka last_log_term, old `epoch` name is left for compatibility + pub term_history: Vec, +} + +/// Things safekeeper should know about timeline state on peers. +/// Used as both model and internally. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PeerInfo { + pub sk_id: NodeId, + pub term: Term, + /// Term of the last entry. + pub last_log_term: Term, + /// LSN of the last record. + pub flush_lsn: Lsn, + pub commit_lsn: Lsn, + /// Since which LSN safekeeper has WAL. + pub local_start_lsn: Lsn, + /// When info was received. Serde annotations are not very useful but make + /// the code compile -- we don't rely on this field externally. + #[serde(skip)] + #[serde(default = "Instant::now")] + pub ts: Instant, + pub pg_connstr: String, + pub http_connstr: String, +} + +pub type FullTransactionId = u64; + +/// Hot standby feedback received from replica +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct HotStandbyFeedback { + pub ts: TimestampTz, + pub xmin: FullTransactionId, + pub catalog_xmin: FullTransactionId, +} + +pub const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0; + +impl HotStandbyFeedback { + pub fn empty() -> HotStandbyFeedback { + HotStandbyFeedback { + ts: 0, + xmin: 0, + catalog_xmin: 0, + } + } +} + +/// Standby status update +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct StandbyReply { + pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby. + pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby. + pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby. + pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. + pub reply_requested: bool, +} + +impl StandbyReply { + pub fn empty() -> Self { + StandbyReply { + write_lsn: Lsn::INVALID, + flush_lsn: Lsn::INVALID, + apply_lsn: Lsn::INVALID, + reply_ts: 0, + reply_requested: false, + } + } +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct StandbyFeedback { + pub reply: StandbyReply, + pub hs_feedback: HotStandbyFeedback, +} + +impl StandbyFeedback { + pub fn empty() -> Self { + StandbyFeedback { + reply: StandbyReply::empty(), + hs_feedback: HotStandbyFeedback::empty(), + } + } +} + +/// Receiver is either pageserver or regular standby, which have different +/// feedbacks. +/// Used as both model and internally. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum ReplicationFeedback { + Pageserver(PageserverFeedback), + Standby(StandbyFeedback), +} + +/// Uniquely identifies a WAL service connection. Logged in spans for +/// observability. +pub type ConnectionId = u32; + +/// Serialize is used only for json'ing in API response. Also used internally. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WalSenderState { + pub ttid: TenantTimelineId, + pub addr: SocketAddr, + pub conn_id: ConnectionId, + // postgres application_name + pub appname: Option, + pub feedback: ReplicationFeedback, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WalReceiverState { + /// None means it is recovery initiated by us (this safekeeper). + pub conn_id: Option, + pub status: WalReceiverStatus, +} + +/// Walreceiver status. Currently only whether it passed voting stage and +/// started receiving the stream, but it is easy to add more if needed. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum WalReceiverStatus { + Voting, + Streaming, +} + +/// Info about timeline on safekeeper ready for reporting. +#[derive(Debug, Serialize, Deserialize)] +pub struct TimelineStatus { + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub acceptor_state: AcceptorStateStatus, + pub pg_info: ServerInfo, + pub flush_lsn: Lsn, + pub timeline_start_lsn: Lsn, + pub local_start_lsn: Lsn, + pub commit_lsn: Lsn, + pub backup_lsn: Lsn, + pub peer_horizon_lsn: Lsn, + pub remote_consistent_lsn: Lsn, + pub peers: Vec, + pub walsenders: Vec, + pub walreceivers: Vec, +} + fn lsn_invalid() -> Lsn { Lsn::INVALID } diff --git a/safekeeper/src/control_file_upgrade.rs b/safekeeper/src/control_file_upgrade.rs index a4b4670e42..dd152fd4cc 100644 --- a/safekeeper/src/control_file_upgrade.rs +++ b/safekeeper/src/control_file_upgrade.rs @@ -1,11 +1,12 @@ //! Code to deal with safekeeper control file upgrades use crate::{ - safekeeper::{AcceptorState, PgUuid, ServerInfo, Term, TermHistory, TermLsn}, + safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn}, state::{EvictionState, PersistedPeers, TimelinePersistentState}, wal_backup_partial, }; use anyhow::{bail, Result}; use pq_proto::SystemId; +use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tracing::*; use utils::{ diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs index 93011eddec..19362a0992 100644 --- a/safekeeper/src/debug_dump.rs +++ b/safekeeper/src/debug_dump.rs @@ -14,6 +14,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use postgres_ffi::XLogSegNo; use postgres_ffi::MAX_SEND_SIZE; +use safekeeper_api::models::WalSenderState; use serde::Deserialize; use serde::Serialize; @@ -25,7 +26,6 @@ use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; use crate::safekeeper::TermHistory; -use crate::send_wal::WalSenderState; use crate::state::TimelineMemState; use crate::state::TimelinePersistentState; use crate::timeline::get_timeline_dir; diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index 2ca6333ba8..bb639bfb32 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -4,6 +4,8 @@ use anyhow::Context; use pageserver_api::models::ShardParameters; use pageserver_api::shard::{ShardIdentity, ShardStripeSize}; +use safekeeper_api::models::ConnectionId; +use safekeeper_api::Term; use std::future::Future; use std::str::{self, FromStr}; use std::sync::Arc; @@ -16,9 +18,7 @@ use crate::auth::check_permission; use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage}; use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE}; -use crate::safekeeper::Term; use crate::timeline::TimelineError; -use crate::wal_service::ConnectionId; use crate::{GlobalTimelines, SafeKeeperConf}; use postgres_backend::PostgresBackend; use postgres_backend::QueryError; diff --git a/safekeeper/src/http/client.rs b/safekeeper/src/http/client.rs index a166fc1ab9..669a9c0ce9 100644 --- a/safekeeper/src/http/client.rs +++ b/safekeeper/src/http/client.rs @@ -8,6 +8,7 @@ //! etc. use reqwest::{IntoUrl, Method, StatusCode}; +use safekeeper_api::models::TimelineStatus; use std::error::Error as _; use utils::{ http::error::HttpErrorBody, @@ -15,8 +16,6 @@ use utils::{ logging::SecretString, }; -use super::routes::TimelineStatus; - #[derive(Debug, Clone)] pub struct Client { mgmt_api_endpoint: String, diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 71c36f1d46..9bc1bf3409 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -1,5 +1,9 @@ use hyper::{Body, Request, Response, StatusCode}; -use serde::{Deserialize, Serialize}; +use safekeeper_api::models::AcceptorStateStatus; +use safekeeper_api::models::SafekeeperStatus; +use safekeeper_api::models::TermSwitchApiEntry; +use safekeeper_api::models::TimelineStatus; +use safekeeper_api::ServerInfo; use std::collections::HashMap; use std::fmt; use std::io::Write as _; @@ -31,26 +35,17 @@ use utils::{ request::{ensure_no_body, parse_request_param}, RequestExt, RouterBuilder, }, - id::{NodeId, TenantId, TenantTimelineId, TimelineId}, + id::{TenantId, TenantTimelineId, TimelineId}, lsn::Lsn, }; use crate::debug_dump::TimelineDigestRequest; -use crate::receive_wal::WalReceiverState; -use crate::safekeeper::Term; -use crate::safekeeper::{ServerInfo, TermLsn}; -use crate::send_wal::WalSenderState; -use crate::timeline::PeerInfo; +use crate::safekeeper::TermLsn; use crate::timelines_global_map::TimelineDeleteForceResult; use crate::GlobalTimelines; use crate::SafeKeeperConf; use crate::{copy_timeline, debug_dump, patch_control_file, pull_timeline}; -#[derive(Debug, Serialize)] -struct SafekeeperStatus { - id: NodeId, -} - /// Healthcheck handler. async fn status_handler(request: Request) -> Result, ApiError> { check_permission(&request, None)?; @@ -73,50 +68,6 @@ fn get_global_timelines(request: &Request) -> Arc { .clone() } -/// Same as TermLsn, but serializes LSN using display serializer -/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct TermSwitchApiEntry { - pub term: Term, - pub lsn: Lsn, -} - -impl From for TermLsn { - fn from(api_val: TermSwitchApiEntry) -> Self { - TermLsn { - term: api_val.term, - lsn: api_val.lsn, - } - } -} - -/// Augment AcceptorState with last_log_term for convenience -#[derive(Debug, Serialize, Deserialize)] -pub struct AcceptorStateStatus { - pub term: Term, - pub epoch: Term, // aka last_log_term - pub term_history: Vec, -} - -/// Info about timeline on safekeeper ready for reporting. -#[derive(Debug, Serialize, Deserialize)] -pub struct TimelineStatus { - pub tenant_id: TenantId, - pub timeline_id: TimelineId, - pub acceptor_state: AcceptorStateStatus, - pub pg_info: ServerInfo, - pub flush_lsn: Lsn, - pub timeline_start_lsn: Lsn, - pub local_start_lsn: Lsn, - pub commit_lsn: Lsn, - pub backup_lsn: Lsn, - pub peer_horizon_lsn: Lsn, - pub remote_consistent_lsn: Lsn, - pub peers: Vec, - pub walsenders: Vec, - pub walreceivers: Vec, -} - fn check_permission(request: &Request, tenant_id: Option) -> Result<(), ApiError> { check_permission_with(request, |claims| { crate::auth::check_permission(claims, tenant_id) @@ -187,6 +138,15 @@ async fn timeline_list_handler(request: Request) -> Result, json_response(StatusCode::OK, res) } +impl From for TermLsn { + fn from(api_val: TermSwitchApiEntry) -> Self { + TermLsn { + term: api_val.term, + lsn: api_val.lsn, + } + } +} + /// Report info about timeline. async fn timeline_status_handler(request: Request) -> Result, ApiError> { let ttid = TenantTimelineId::new( diff --git a/safekeeper/src/json_ctrl.rs b/safekeeper/src/json_ctrl.rs index dc4ad3706e..256e350ceb 100644 --- a/safekeeper/src/json_ctrl.rs +++ b/safekeeper/src/json_ctrl.rs @@ -8,16 +8,17 @@ use anyhow::Context; use postgres_backend::QueryError; +use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; use tracing::*; use crate::handler::SafekeeperPostgresHandler; -use crate::safekeeper::{AcceptorProposerMessage, AppendResponse, ServerInfo}; +use crate::safekeeper::{AcceptorProposerMessage, AppendResponse}; use crate::safekeeper::{ AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, ProposerElected, }; -use crate::safekeeper::{Term, TermHistory, TermLsn}; +use crate::safekeeper::{TermHistory, TermLsn}; use crate::state::TimelinePersistentState; use crate::timeline::WalResidentTimeline; use postgres_backend::PostgresBackend; diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index f58a9dca1d..00777273cb 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -4,6 +4,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use futures::{SinkExt, StreamExt, TryStreamExt}; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; +use safekeeper_api::{models::TimelineStatus, Term}; use serde::{Deserialize, Serialize}; use std::{ cmp::min, @@ -21,11 +22,7 @@ use tracing::{error, info, instrument}; use crate::{ control_file::CONTROL_FILE_NAME, debug_dump, - http::{ - client::{self, Client}, - routes::TimelineStatus, - }, - safekeeper::Term, + http::client::{self, Client}, state::{EvictionState, TimelinePersistentState}, timeline::{Timeline, WalResidentTimeline}, timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}, diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 2a49890d61..08371177cd 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -9,9 +9,7 @@ use crate::metrics::{ }; use crate::safekeeper::AcceptorProposerMessage; use crate::safekeeper::ProposerAcceptorMessage; -use crate::safekeeper::ServerInfo; use crate::timeline::WalResidentTimeline; -use crate::wal_service::ConnectionId; use crate::GlobalTimelines; use anyhow::{anyhow, Context}; use bytes::BytesMut; @@ -23,8 +21,8 @@ use postgres_backend::PostgresBackend; use postgres_backend::PostgresBackendReader; use postgres_backend::QueryError; use pq_proto::BeMessage; -use serde::Deserialize; -use serde::Serialize; +use safekeeper_api::models::{ConnectionId, WalReceiverState, WalReceiverStatus}; +use safekeeper_api::ServerInfo; use std::future; use std::net::SocketAddr; use std::sync::Arc; @@ -171,21 +169,6 @@ impl WalReceiversShared { } } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WalReceiverState { - /// None means it is recovery initiated by us (this safekeeper). - pub conn_id: Option, - pub status: WalReceiverStatus, -} - -/// Walreceiver status. Currently only whether it passed voting stage and -/// started receiving the stream, but it is easy to add more if needed. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum WalReceiverStatus { - Voting, - Streaming, -} - /// Scope guard to access slot in WalReceivers registry and unregister from /// it in Drop. pub struct WalReceiverGuard { diff --git a/safekeeper/src/recovery.rs b/safekeeper/src/recovery.rs index 7b87166aa0..61647c16b0 100644 --- a/safekeeper/src/recovery.rs +++ b/safekeeper/src/recovery.rs @@ -7,6 +7,8 @@ use std::{fmt, pin::pin}; use anyhow::{bail, Context}; use futures::StreamExt; use postgres_protocol::message::backend::ReplicationMessage; +use safekeeper_api::models::{PeerInfo, TimelineStatus}; +use safekeeper_api::Term; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio::time::timeout; use tokio::{ @@ -24,13 +26,11 @@ use crate::receive_wal::{WalAcceptor, REPLY_QUEUE_SIZE}; use crate::safekeeper::{AppendRequest, AppendRequestHeader}; use crate::timeline::WalResidentTimeline; use crate::{ - http::routes::TimelineStatus, receive_wal::MSG_QUEUE_SIZE, safekeeper::{ - AcceptorProposerMessage, ProposerAcceptorMessage, ProposerElected, Term, TermHistory, - TermLsn, VoteRequest, + AcceptorProposerMessage, ProposerAcceptorMessage, ProposerElected, TermHistory, TermLsn, + VoteRequest, }, - timeline::PeerInfo, SafeKeeperConf, }; diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 6eb69f0b7c..ccd7940c72 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -5,6 +5,9 @@ use byteorder::{LittleEndian, ReadBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use postgres_ffi::{TimeLineID, MAX_SEND_SIZE}; +use safekeeper_api::models::HotStandbyFeedback; +use safekeeper_api::Term; +use safekeeper_api::INVALID_TERM; use serde::{Deserialize, Serialize}; use std::cmp::max; use std::cmp::min; @@ -16,7 +19,6 @@ use tracing::*; use crate::control_file; use crate::metrics::MISC_OPERATION_SECONDS; -use crate::send_wal::HotStandbyFeedback; use crate::state::TimelineState; use crate::wal_storage; @@ -31,10 +33,6 @@ use utils::{ const SK_PROTOCOL_VERSION: u32 = 2; pub const UNKNOWN_SERVER_VERSION: u32 = 0; -/// Consensus logical timestamp. -pub type Term = u64; -pub const INVALID_TERM: Term = 0; - #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct TermLsn { pub term: Term, @@ -198,16 +196,6 @@ impl AcceptorState { } } -/// Information about Postgres. Safekeeper gets it once and then verifies -/// all further connections from computes match. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct ServerInfo { - /// Postgres server version - pub pg_version: u32, - pub system_id: SystemId, - pub wal_seg_size: u32, -} - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct PersistedPeerInfo { /// LSN up to which safekeeper offloaded WAL to s3. @@ -1041,6 +1029,7 @@ where mod tests { use futures::future::BoxFuture; use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE}; + use safekeeper_api::ServerInfo; use super::*; use crate::state::{EvictionState, PersistedPeers, TimelinePersistentState}; diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 0887cf7264..8463221998 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -4,11 +4,10 @@ use crate::handler::SafekeeperPostgresHandler; use crate::metrics::RECEIVED_PS_FEEDBACKS; use crate::receive_wal::WalReceivers; -use crate::safekeeper::{Term, TermLsn}; +use crate::safekeeper::TermLsn; use crate::send_interpreted_wal::InterpretedWalSender; use crate::timeline::WalResidentTimeline; use crate::wal_reader_stream::WalReaderStreamBuilder; -use crate::wal_service::ConnectionId; use crate::wal_storage::WalReader; use anyhow::{bail, Context as AnyhowContext}; use bytes::Bytes; @@ -19,7 +18,11 @@ use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError}; use postgres_ffi::get_current_timestamp; use postgres_ffi::{TimestampTz, MAX_SEND_SIZE}; use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody}; -use serde::{Deserialize, Serialize}; +use safekeeper_api::models::{ + ConnectionId, HotStandbyFeedback, ReplicationFeedback, StandbyFeedback, StandbyReply, + WalSenderState, INVALID_FULL_TRANSACTION_ID, +}; +use safekeeper_api::Term; use tokio::io::{AsyncRead, AsyncWrite}; use utils::failpoint_support; use utils::id::TenantTimelineId; @@ -28,7 +31,6 @@ use utils::postgres_client::PostgresClientProtocol; use std::cmp::{max, min}; use std::net::SocketAddr; -use std::str; use std::sync::Arc; use std::time::Duration; use tokio::sync::watch::Receiver; @@ -42,65 +44,6 @@ const STANDBY_STATUS_UPDATE_TAG_BYTE: u8 = b'r'; // neon extension of replication protocol const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z'; -type FullTransactionId = u64; - -/// Hot standby feedback received from replica -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct HotStandbyFeedback { - pub ts: TimestampTz, - pub xmin: FullTransactionId, - pub catalog_xmin: FullTransactionId, -} - -const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0; - -impl HotStandbyFeedback { - pub fn empty() -> HotStandbyFeedback { - HotStandbyFeedback { - ts: 0, - xmin: 0, - catalog_xmin: 0, - } - } -} - -/// Standby status update -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct StandbyReply { - pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby. - pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby. - pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby. - pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. - pub reply_requested: bool, -} - -impl StandbyReply { - fn empty() -> Self { - StandbyReply { - write_lsn: Lsn::INVALID, - flush_lsn: Lsn::INVALID, - apply_lsn: Lsn::INVALID, - reply_ts: 0, - reply_requested: false, - } - } -} - -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub struct StandbyFeedback { - pub reply: StandbyReply, - pub hs_feedback: HotStandbyFeedback, -} - -impl StandbyFeedback { - pub fn empty() -> Self { - StandbyFeedback { - reply: StandbyReply::empty(), - hs_feedback: HotStandbyFeedback::empty(), - } - } -} - /// WalSenders registry. Timeline holds it (wrapped in Arc). pub struct WalSenders { mutex: Mutex, @@ -341,25 +284,6 @@ impl WalSendersShared { } } -// Serialized is used only for pretty printing in json. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WalSenderState { - ttid: TenantTimelineId, - addr: SocketAddr, - conn_id: ConnectionId, - // postgres application_name - appname: Option, - feedback: ReplicationFeedback, -} - -// Receiver is either pageserver or regular standby, which have different -// feedbacks. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -enum ReplicationFeedback { - Pageserver(PageserverFeedback), - Standby(StandbyFeedback), -} - // id of the occupied slot in WalSenders to access it (and save in the // WalSenderGuard). We could give Arc directly to the slot, but there is not // much sense in that as values aggregation which is performed on each feedback @@ -888,6 +812,7 @@ impl ReplyReader { #[cfg(test)] mod tests { + use safekeeper_api::models::FullTransactionId; use utils::id::{TenantId, TimelineId}; use super::*; diff --git a/safekeeper/src/state.rs b/safekeeper/src/state.rs index 941b7e67d0..c6ae6c1d2b 100644 --- a/safekeeper/src/state.rs +++ b/safekeeper/src/state.rs @@ -5,7 +5,7 @@ use std::{cmp::max, ops::Deref}; use anyhow::{bail, Result}; use postgres_ffi::WAL_SEGMENT_SIZE; -use safekeeper_api::models::TimelineTermBumpResponse; +use safekeeper_api::{models::TimelineTermBumpResponse, ServerInfo, Term}; use serde::{Deserialize, Serialize}; use utils::{ id::{NodeId, TenantId, TenantTimelineId, TimelineId}, @@ -14,10 +14,7 @@ use utils::{ use crate::{ control_file, - safekeeper::{ - AcceptorState, PersistedPeerInfo, PgUuid, ServerInfo, Term, TermHistory, - UNKNOWN_SERVER_VERSION, - }, + safekeeper::{AcceptorState, PersistedPeerInfo, PgUuid, TermHistory, UNKNOWN_SERVER_VERSION}, timeline::TimelineError, wal_backup_partial::{self}, }; diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 94d6ef1061..36860a0da2 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -4,8 +4,8 @@ use anyhow::{anyhow, bail, Result}; use camino::{Utf8Path, Utf8PathBuf}; use remote_storage::RemotePath; -use safekeeper_api::models::TimelineTermBumpResponse; -use serde::{Deserialize, Serialize}; +use safekeeper_api::models::{PeerInfo, TimelineTermBumpResponse}; +use safekeeper_api::Term; use tokio::fs::{self}; use tokio_util::sync::CancellationToken; use utils::id::TenantId; @@ -31,9 +31,7 @@ use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; use crate::control_file; use crate::rate_limit::RateLimiter; use crate::receive_wal::WalReceivers; -use crate::safekeeper::{ - AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, Term, TermLsn, -}; +use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, TermLsn}; use crate::send_wal::WalSenders; use crate::state::{EvictionState, TimelineMemState, TimelinePersistentState, TimelineState}; use crate::timeline_guard::ResidenceGuard; @@ -47,40 +45,17 @@ use crate::wal_storage::{Storage as wal_storage_iface, WalReader}; use crate::SafeKeeperConf; use crate::{debug_dump, timeline_manager, wal_storage}; -/// Things safekeeper should know about timeline state on peers. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PeerInfo { - pub sk_id: NodeId, - pub term: Term, - /// Term of the last entry. - pub last_log_term: Term, - /// LSN of the last record. - pub flush_lsn: Lsn, - pub commit_lsn: Lsn, - /// Since which LSN safekeeper has WAL. - pub local_start_lsn: Lsn, - /// When info was received. Serde annotations are not very useful but make - /// the code compile -- we don't rely on this field externally. - #[serde(skip)] - #[serde(default = "Instant::now")] - ts: Instant, - pub pg_connstr: String, - pub http_connstr: String, -} - -impl PeerInfo { - fn from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> PeerInfo { - PeerInfo { - sk_id: NodeId(sk_info.safekeeper_id), - term: sk_info.term, - last_log_term: sk_info.last_log_term, - flush_lsn: Lsn(sk_info.flush_lsn), - commit_lsn: Lsn(sk_info.commit_lsn), - local_start_lsn: Lsn(sk_info.local_start_lsn), - pg_connstr: sk_info.safekeeper_connstr.clone(), - http_connstr: sk_info.http_connstr.clone(), - ts, - } +fn peer_info_from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> PeerInfo { + PeerInfo { + sk_id: NodeId(sk_info.safekeeper_id), + term: sk_info.term, + last_log_term: sk_info.last_log_term, + flush_lsn: Lsn(sk_info.flush_lsn), + commit_lsn: Lsn(sk_info.commit_lsn), + local_start_lsn: Lsn(sk_info.local_start_lsn), + pg_connstr: sk_info.safekeeper_connstr.clone(), + http_connstr: sk_info.http_connstr.clone(), + ts, } } @@ -697,7 +672,7 @@ impl Timeline { { let mut shared_state = self.write_shared_state().await; shared_state.sk.record_safekeeper_info(&sk_info).await?; - let peer_info = PeerInfo::from_sk_info(&sk_info, Instant::now()); + let peer_info = peer_info_from_sk_info(&sk_info, Instant::now()); shared_state.peers_info.upsert(&peer_info); } Ok(()) diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index c02fb904cf..a33994dcab 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -14,6 +14,7 @@ use std::{ use futures::channel::oneshot; use postgres_ffi::XLogSegNo; +use safekeeper_api::{models::PeerInfo, Term}; use serde::{Deserialize, Serialize}; use tokio::{ task::{JoinError, JoinHandle}, @@ -32,10 +33,9 @@ use crate::{ rate_limit::{rand_duration, RateLimiter}, recovery::recovery_main, remove_wal::calc_horizon_lsn, - safekeeper::Term, send_wal::WalSenders, state::TimelineState, - timeline::{ManagerTimeline, PeerInfo, ReadGuardSharedState, StateSK, WalResidentTimeline}, + timeline::{ManagerTimeline, ReadGuardSharedState, StateSK, WalResidentTimeline}, timeline_guard::{AccessService, GuardId, ResidenceGuard}, timelines_set::{TimelineSetGuard, TimelinesSet}, wal_backup::{self, WalBackupTaskHandle}, diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index e1241ceb9b..ad29c9f66c 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -4,7 +4,6 @@ use crate::defaults::DEFAULT_EVICTION_CONCURRENCY; use crate::rate_limit::RateLimiter; -use crate::safekeeper::ServerInfo; use crate::state::TimelinePersistentState; use crate::timeline::{get_tenant_dir, get_timeline_dir, Timeline, TimelineError}; use crate::timelines_set::TimelinesSet; @@ -13,6 +12,7 @@ use crate::{control_file, wal_storage, SafeKeeperConf}; use anyhow::{bail, Context, Result}; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; +use safekeeper_api::ServerInfo; use serde::Serialize; use std::collections::HashMap; use std::str::FromStr; diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 34b5dbeaa1..8517fa0344 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -3,6 +3,7 @@ use anyhow::{Context, Result}; use camino::{Utf8Path, Utf8PathBuf}; use futures::stream::FuturesOrdered; use futures::StreamExt; +use safekeeper_api::models::PeerInfo; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use utils::backoff; @@ -30,7 +31,7 @@ use tracing::*; use utils::{id::TenantTimelineId, lsn::Lsn}; use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS, WAL_BACKUP_TASKS}; -use crate::timeline::{PeerInfo, WalResidentTimeline}; +use crate::timeline::WalResidentTimeline; use crate::timeline_manager::{Manager, StateSnapshot}; use crate::{SafeKeeperConf, WAL_BACKUP_RUNTIME}; diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index bddfca50e4..4e5b34a9bf 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -22,6 +22,7 @@ use camino::Utf8PathBuf; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; use remote_storage::RemotePath; +use safekeeper_api::Term; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; @@ -31,7 +32,6 @@ use utils::{id::NodeId, lsn::Lsn}; use crate::{ metrics::{MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS}, rate_limit::{rand_duration, RateLimiter}, - safekeeper::Term, timeline::WalResidentTimeline, timeline_manager::StateSnapshot, wal_backup::{self}, diff --git a/safekeeper/src/wal_reader_stream.rs b/safekeeper/src/wal_reader_stream.rs index f8c0c502cd..aea628c208 100644 --- a/safekeeper/src/wal_reader_stream.rs +++ b/safekeeper/src/wal_reader_stream.rs @@ -4,12 +4,12 @@ use async_stream::try_stream; use bytes::Bytes; use futures::Stream; use postgres_backend::CopyStreamHandlerEnd; +use safekeeper_api::Term; use std::time::Duration; use tokio::time::timeout; use utils::lsn::Lsn; use crate::{ - safekeeper::Term, send_wal::{EndWatch, WalSenderGuard}, timeline::WalResidentTimeline, }; diff --git a/safekeeper/src/wal_service.rs b/safekeeper/src/wal_service.rs index 1ff83918a7..1ebcb060e7 100644 --- a/safekeeper/src/wal_service.rs +++ b/safekeeper/src/wal_service.rs @@ -4,6 +4,7 @@ //! use anyhow::{Context, Result}; use postgres_backend::QueryError; +use safekeeper_api::models::ConnectionId; use std::sync::Arc; use std::time::Duration; use tokio::net::TcpStream; @@ -114,8 +115,6 @@ async fn handle_socket( .await } -/// Unique WAL service connection ids are logged in spans for observability. -pub type ConnectionId = u32; pub type ConnectionCount = u32; pub fn issue_connection_id(count: &mut ConnectionCount) -> ConnectionId { diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 12aa025771..efcdd89e7d 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -15,12 +15,13 @@ use desim::{ }; use http::Uri; use safekeeper::{ - safekeeper::{ProposerAcceptorMessage, SafeKeeper, ServerInfo, UNKNOWN_SERVER_VERSION}, + safekeeper::{ProposerAcceptorMessage, SafeKeeper, UNKNOWN_SERVER_VERSION}, state::{TimelinePersistentState, TimelineState}, timeline::TimelineError, wal_storage::Storage, SafeKeeperConf, }; +use safekeeper_api::ServerInfo; use tracing::{debug, info_span, warn}; use utils::{ id::{NodeId, TenantId, TenantTimelineId, TimelineId}, From 2c910628288a29435f102471c6927d3cd24266ba Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 13 Dec 2024 14:52:54 +0000 Subject: [PATCH 04/89] test_prefetch: reduce timeout to default 5m from 10m (#10105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem `test_prefetch` is flaky (https://github.com/neondatabase/neon/issues/9961), but if it passes, the run time is less than 30 seconds — we don't need an extended timeout for it. ## Summary of changes - Remove extended test timeout for `test_prefetch` --- test_runner/regress/test_prefetch_buffer_resize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_runner/regress/test_prefetch_buffer_resize.py b/test_runner/regress/test_prefetch_buffer_resize.py index 7676b78b0e..99fe80e621 100644 --- a/test_runner/regress/test_prefetch_buffer_resize.py +++ b/test_runner/regress/test_prefetch_buffer_resize.py @@ -7,7 +7,6 @@ from fixtures.neon_fixtures import NeonEnvBuilder @pytest.mark.parametrize("shard_count", [None, 4]) -@pytest.mark.timeout(600) def test_prefetch(neon_env_builder: NeonEnvBuilder, shard_count: int | None): if shard_count is not None: neon_env_builder.num_pageservers = shard_count From fcff7528517b47f79a55334c22dc6dc89c113be1 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 13 Dec 2024 17:28:21 +0100 Subject: [PATCH 05/89] fix(test_timeline_archival_chaos): flakiness caused by orphan layers (#10083) The test was failing with the scary but generic message `Remote storage metadata corrupted`. The underlying scrubber error is `Orphan layer detected: ...`. The test kills pageserver at random points, hence it's expected that we leak layers if we're killed in the window after layer upload but before it's referenced from index part. Refer to generation numbers RFC for details. Refs: - fixes https://github.com/neondatabase/neon/issues/9988 - root-cause analysis https://github.com/neondatabase/neon/issues/9988#issuecomment-2520673167 --- test_runner/regress/test_timeline_archive.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index e808dd1396..addf702893 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -435,6 +435,14 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): ] ) + env.storage_scrubber.allowed_errors.extend( + [ + # Unclcean shutdowns of pageserver can legitimately result in orphan layers + # (https://github.com/neondatabase/neon/issues/9988#issuecomment-2520558211) + f".*Orphan layer detected: tenants/{tenant_id}/.*" + ] + ) + class TimelineState: def __init__(self): self.timeline_id = TimelineId.generate() From eeabecd89f89a24fa8ee642c916efd97b4e8fa06 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 13 Dec 2024 19:40:26 +0200 Subject: [PATCH 06/89] Correctly update LFC used_pages in case of LFC resize (#10128) ## Problem LFC used_pages statistic is not updated in case of LFC resize (shrinking `neon.file_cache_size_limit`) ## Summary of changes Update `lfc_ctl->used_pages` in `lfc_change_limit_hook` Co-authored-by: Konstantin Knizhnik --- pgxn/neon/file_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 70b250d394..f49415be68 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -365,6 +365,10 @@ lfc_change_limit_hook(int newval, void *extra) neon_log(LOG, "Failed to punch hole in file: %m"); #endif /* We remove the old entry, and re-enter a hole to the hash table */ + for (int i = 0; i < BLOCKS_PER_CHUNK; i++) + { + lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1; + } hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL); memset(&holetag, 0, sizeof(holetag)); From 07d1db54b3f0b9c113aa28fcdb85bacf15979cd9 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Fri, 13 Dec 2024 12:10:42 -0600 Subject: [PATCH 07/89] Improve comments and log messages in the logical replication monitor (#9974) Improved comments will help others when they read the code, and the log messages will help others understand why the logical replication monitor works the way it does. Signed-off-by: Tristan Partin --- pgxn/neon/logical_replication_monitor.c | 53 ++++++++++++++----------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/pgxn/neon/logical_replication_monitor.c b/pgxn/neon/logical_replication_monitor.c index 5eee5a1679..b94faafdfa 100644 --- a/pgxn/neon/logical_replication_monitor.c +++ b/pgxn/neon/logical_replication_monitor.c @@ -131,8 +131,8 @@ get_snapshots_cutoff_lsn(void) { cutoff = snapshot_descriptors[logical_replication_max_snap_files - 1].lsn; elog(LOG, - "ls_monitor: dropping logical slots with restart_lsn lower %X/%X, found %zu snapshot files, limit is %d", - LSN_FORMAT_ARGS(cutoff), snapshot_index, logical_replication_max_snap_files); + "ls_monitor: number of snapshot files, %zu, is larger than limit of %d", + snapshot_index, logical_replication_max_snap_files); } /* Is the size of the logical snapshots directory larger than specified? @@ -162,8 +162,8 @@ get_snapshots_cutoff_lsn(void) } if (cutoff != original) - elog(LOG, "ls_monitor: dropping logical slots with restart_lsn lower than %X/%X, " SNAPDIR " is larger than %d KB", - LSN_FORMAT_ARGS(cutoff), logical_replication_max_logicalsnapdir_size); + elog(LOG, "ls_monitor: " SNAPDIR " is larger than %d KB", + logical_replication_max_logicalsnapdir_size); } pfree(snapshot_descriptors); @@ -214,9 +214,13 @@ InitLogicalReplicationMonitor(void) } /* - * Unused logical replication slots pins WAL and prevents deletion of snapshots. + * Unused logical replication slots pins WAL and prevent deletion of snapshots. * WAL bloat is guarded by max_slot_wal_keep_size; this bgw removes slots which - * need too many .snap files. + * need too many .snap files. These files are stored as AUX files, which are a + * pageserver mechanism for storing non-relation data. AUX files are shipped in + * in the basebackup which is requested by compute_ctl before Postgres starts. + * The larger the time to retrieve the basebackup, the more likely it is the + * compute will be killed by the control plane due to a timeout. */ void LogicalSlotsMonitorMain(Datum main_arg) @@ -239,10 +243,7 @@ LogicalSlotsMonitorMain(Datum main_arg) ProcessConfigFile(PGC_SIGHUP); } - /* - * If there are too many .snap files, just drop all logical slots to - * prevent aux files bloat. - */ + /* Get the cutoff LSN */ cutoff_lsn = get_snapshots_cutoff_lsn(); if (cutoff_lsn > 0) { @@ -252,31 +253,37 @@ LogicalSlotsMonitorMain(Datum main_arg) ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; XLogRecPtr restart_lsn; - /* find the name */ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); - /* Consider only logical repliction slots */ + + /* Consider only active logical repliction slots */ if (!s->in_use || !SlotIsLogical(s)) { LWLockRelease(ReplicationSlotControlLock); continue; } - /* do we need to drop it? */ + /* + * Retrieve the restart LSN to determine if we need to drop the + * slot + */ SpinLockAcquire(&s->mutex); restart_lsn = s->data.restart_lsn; SpinLockRelease(&s->mutex); + + strlcpy(slot_name, s->data.name.data, sizeof(slot_name)); + LWLockRelease(ReplicationSlotControlLock); + if (restart_lsn >= cutoff_lsn) { - LWLockRelease(ReplicationSlotControlLock); + elog(LOG, "ls_monitor: not dropping replication slot %s because restart LSN %X/%X is greater than cutoff LSN %X/%X", + slot_name, LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(cutoff_lsn)); continue; } - strlcpy(slot_name, s->data.name.data, NAMEDATALEN); - elog(LOG, "ls_monitor: dropping slot %s with restart_lsn %X/%X below horizon %X/%X", + elog(LOG, "ls_monitor: dropping replication slot %s because restart LSN %X/%X lower than cutoff LSN %X/%X", slot_name, LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(cutoff_lsn)); - LWLockRelease(ReplicationSlotControlLock); - /* now try to drop it, killing owner before if any */ + /* now try to drop it, killing owner before, if any */ for (;;) { pid_t active_pid; @@ -288,9 +295,9 @@ LogicalSlotsMonitorMain(Datum main_arg) if (active_pid == 0) { /* - * Slot is releasted, try to drop it. Though of course + * Slot is released, try to drop it. Though of course, * it could have been reacquired, so drop can ERROR - * out. Similarly it could have been dropped in the + * out. Similarly, it could have been dropped in the * meanwhile. * * In principle we could remove pg_try/pg_catch, that @@ -300,14 +307,14 @@ LogicalSlotsMonitorMain(Datum main_arg) PG_TRY(); { ReplicationSlotDrop(slot_name, true); - elog(LOG, "ls_monitor: slot %s dropped", slot_name); + elog(LOG, "ls_monitor: replication slot %s dropped", slot_name); } PG_CATCH(); { /* log ERROR and reset elog stack */ EmitErrorReport(); FlushErrorState(); - elog(LOG, "ls_monitor: failed to drop slot %s", slot_name); + elog(LOG, "ls_monitor: failed to drop replication slot %s", slot_name); } PG_END_TRY(); break; @@ -315,7 +322,7 @@ LogicalSlotsMonitorMain(Datum main_arg) else { /* kill the owner and wait for release */ - elog(LOG, "ls_monitor: killing slot %s owner %d", slot_name, active_pid); + elog(LOG, "ls_monitor: killing replication slot %s owner %d", slot_name, active_pid); (void) kill(active_pid, SIGTERM); /* We shouldn't get stuck, but to be safe add timeout. */ ConditionVariableTimedSleep(&s->active_cv, 1000, WAIT_EVENT_REPLICATION_SLOT_DROP); From 7ee5dca752c9e9b7e65752c2561798bdd91ea3f6 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:22:25 -0500 Subject: [PATCH 08/89] fix(pageserver): race between gc-compaction and repartition (#10127) ## Problem close https://github.com/neondatabase/neon/issues/10124 gc-compaction split_gc_jobs is holding the repartition lock for too long time. ## Summary of changes * Ensure split_gc_compaction_jobs drops the repartition lock once it finishes cloning the structures. * Update comments. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline.rs | 5 ++++- pageserver/src/tenant/timeline/compaction.rs | 8 +++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b5c7079226..75e268a1b9 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4064,8 +4064,11 @@ impl Timeline { // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline. // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()` // and hence before the compaction task starts. + // Note that there are a third "caller" that will take the `partitioning` lock. It is `gc_compaction_split_jobs` for + // gc-compaction where it uses the repartition data to determine the split jobs. In the future, it might use its own + // heuristics, but for now, we should allow concurrent access to it and let the caller retry compaction. return Err(CompactionError::Other(anyhow!( - "repartition() called concurrently, this should not happen" + "repartition() called concurrently, this is rare and a retry should be fine" ))); }; let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard; diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 701247194b..5e6290729c 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1821,10 +1821,12 @@ impl Timeline { let mut compact_jobs = Vec::new(); // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning // by estimating the amount of files read for a compaction job. We should also partition on LSN. - let Ok(partition) = self.partitioning.try_lock() else { - bail!("failed to acquire partition lock"); + let ((dense_ks, sparse_ks), _) = { + let Ok(partition) = self.partitioning.try_lock() else { + bail!("failed to acquire partition lock"); + }; + partition.clone() }; - let ((dense_ks, sparse_ks), _) = &*partition; // Truncate the key range to be within user specified compaction range. fn truncate_to( source_start: &Key, From d56fea680ec2f2a741a3383c63a55eadf86ad602 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 13 Dec 2024 19:56:32 +0000 Subject: [PATCH 09/89] CI: always require aws-oicd-role-arn input to be set (#10145) ## Problem `benchmarking` job fails because `aws-oicd-role-arn` input is not set ## Summary of changes: - Set `aws-oicd-role-arn` for `benchmarking job - Always require `aws-oicd-role-arn` to be set - Rename `aws_oicd_role_arn` to `aws-oicd-role-arn` for consistency --- .../actions/allure-report-generate/action.yml | 14 +++--- .../actions/allure-report-store/action.yml | 14 +++--- .github/actions/download/action.yml | 12 ++--- .../actions/run-python-test-set/action.yml | 25 +++++----- .github/actions/save-coverage-data/action.yml | 4 +- .github/actions/upload/action.yml | 4 +- .../workflows/_benchmarking_preparation.yml | 2 +- .github/workflows/_build-and-test-locally.yml | 4 +- .github/workflows/benchmarking.yml | 48 +++++++++---------- .github/workflows/build_and_test.yml | 13 +++-- .github/workflows/cloud-regress.yml | 5 +- .github/workflows/ingest_benchmark.yml | 16 +++---- .github/workflows/periodic_pagebench.yml | 2 +- .github/workflows/pg-clients.yml | 10 ++-- 14 files changed, 88 insertions(+), 85 deletions(-) diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml index d6219c31b4..d07e3e32e8 100644 --- a/.github/actions/allure-report-generate/action.yml +++ b/.github/actions/allure-report-generate/action.yml @@ -7,10 +7,9 @@ inputs: type: boolean required: false default: false - aws_oicd_role_arn: - description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' - required: false - default: '' + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true outputs: base-url: @@ -84,12 +83,11 @@ runs: ALLURE_VERSION: 2.27.0 ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777 - - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test - if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 + if: ${{ !cancelled() }} with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 # 1 hour should be more than enough to upload report # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml index 3c83656c89..8548a886cf 100644 --- a/.github/actions/allure-report-store/action.yml +++ b/.github/actions/allure-report-store/action.yml @@ -8,10 +8,9 @@ inputs: unique-key: description: 'string to distinguish different results in the same run' required: true - aws_oicd_role_arn: - description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' - required: false - default: '' + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true runs: using: "composite" @@ -36,12 +35,11 @@ runs: env: REPORT_DIR: ${{ inputs.report-dir }} - - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test - if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 + if: ${{ !cancelled() }} with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 # 1 hour should be more than enough to upload report - name: Upload test results diff --git a/.github/actions/download/action.yml b/.github/actions/download/action.yml index d6b1fac9f7..14b2ef8eac 100644 --- a/.github/actions/download/action.yml +++ b/.github/actions/download/action.yml @@ -15,19 +15,17 @@ inputs: prefix: description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" required: false - aws_oicd_role_arn: - description: "the OIDC role arn for aws auth" - required: false - default: "" + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true runs: using: "composite" steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 - name: Download artifact diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index dd5c890f5b..9a0261d430 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -48,10 +48,9 @@ inputs: description: 'benchmark durations JSON' required: false default: '{}' - aws_oicd_role_arn: - description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' - required: false - default: '' + aws-oicd-role-arn: + description: 'OIDC role arn to interract with S3' + required: true runs: using: "composite" @@ -62,7 +61,7 @@ runs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact path: /tmp/neon - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Download Neon binaries for the previous release if: inputs.build_type != 'remote' @@ -71,7 +70,7 @@ runs: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact path: /tmp/neon-previous prefix: latest - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Download compatibility snapshot if: inputs.build_type != 'remote' @@ -83,7 +82,7 @@ runs: # The lack of compatibility snapshot (for example, for the new Postgres version) # shouldn't fail the whole job. Only relevant test should fail. skip-if-does-not-exist: true - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Checkout if: inputs.needs_postgres_source == 'true' @@ -221,19 +220,19 @@ runs: # The lack of compatibility snapshot shouldn't fail the job # (for example if we didn't run the test for non build-and-test workflow) skip-if-does-not-exist: true - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test - if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} - uses: aws-actions/configure-aws-credentials@v4 + - uses: aws-actions/configure-aws-credentials@v4 + if: ${{ !cancelled() }} with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 # 1 hour should be more than enough to upload report + - name: Upload test results if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-store with: report-dir: /tmp/test_output/allure/results unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }} - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} diff --git a/.github/actions/save-coverage-data/action.yml b/.github/actions/save-coverage-data/action.yml index 9e3a7cba24..1bbea5400f 100644 --- a/.github/actions/save-coverage-data/action.yml +++ b/.github/actions/save-coverage-data/action.yml @@ -14,11 +14,11 @@ runs: name: coverage-data-artifact path: /tmp/coverage skip-if-does-not-exist: true # skip if there's no previous coverage to download - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} - name: Upload coverage data uses: ./.github/actions/upload with: name: coverage-data-artifact path: /tmp/coverage - aws_oicd_role_arn: ${{ inputs.aws_oicd_role_arn }} + aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} diff --git a/.github/actions/upload/action.yml b/.github/actions/upload/action.yml index 6616d08899..ac5579ccea 100644 --- a/.github/actions/upload/action.yml +++ b/.github/actions/upload/action.yml @@ -14,7 +14,7 @@ inputs: prefix: description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" required: false - aws_oicd_role_arn: + aws-oicd-role-arn: description: "the OIDC role arn for aws auth" required: false default: "" @@ -61,7 +61,7 @@ runs: uses: aws-actions/configure-aws-credentials@v4 with: aws-region: eu-central-1 - role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-to-assume: ${{ inputs.aws-oicd-role-arn }} role-duration-seconds: 3600 - name: Upload artifact diff --git a/.github/workflows/_benchmarking_preparation.yml b/.github/workflows/_benchmarking_preparation.yml index 371d815fc8..fd328586b3 100644 --- a/.github/workflows/_benchmarking_preparation.yml +++ b/.github/workflows/_benchmarking_preparation.yml @@ -70,7 +70,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # we create a table that has one row for each database that we want to restore with the status whether the restore is done - name: Create benchmark_restore_status table if it does not exist diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 456399f3c3..4263bacce8 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -264,7 +264,7 @@ jobs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact path: /tmp/neon - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # XXX: keep this after the binaries.list is formed, so the coverage can properly work later - name: Merge and upload coverage data @@ -308,7 +308,7 @@ jobs: real_s3_region: eu-central-1 rerun_failed: true pg_version: ${{ matrix.pg_version }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} CHECK_ONDISK_DATA_COMPATIBILITY: nonempty diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 2d37be8837..bbdcf5ef49 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -105,7 +105,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -123,7 +123,7 @@ jobs: run_in_parallel: false save_perf_report: ${{ env.SAVE_PERF_REPORT }} pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # Set --sparse-ordering option of pytest-order plugin # to ensure tests are running in order of appears in the file. # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests @@ -153,7 +153,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -205,7 +205,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Run Logical Replication benchmarks uses: ./.github/actions/run-python-test-set @@ -216,7 +216,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -233,7 +233,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -245,7 +245,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -407,7 +407,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) @@ -455,7 +455,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -470,7 +470,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -485,7 +485,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -503,7 +503,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -614,7 +614,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -629,7 +629,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -640,7 +640,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -711,7 +711,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Set up Connection String id: set-up-connstr @@ -743,7 +743,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 43200 -k test_clickbench pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -757,7 +757,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -822,7 +822,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Get Connstring Secret Name run: | @@ -861,7 +861,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_tpch pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -873,7 +873,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} @@ -931,7 +931,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Set up Connection String id: set-up-connstr @@ -963,7 +963,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_user_examples pg_version: ${{ env.DEFAULT_PG_VERSION }} - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -974,7 +974,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b3556debe3..55c4bf08b9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -336,6 +336,7 @@ jobs: extra_params: --splits 5 --group ${{ matrix.pytest_split_group }} benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }} pg_version: v16 + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -393,7 +394,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -455,14 +456,14 @@ jobs: with: name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact path: /tmp/neon - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Get coverage artifact uses: ./.github/actions/download with: name: coverage-data-artifact path: /tmp/coverage - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Merge coverage data run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge @@ -1279,6 +1280,12 @@ jobs: echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT} echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT} + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + - name: Promote compatibility snapshot and Neon artifact env: BUCKET: neon-github-public-dev diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 7b9e434ec3..55f42ea533 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -79,7 +79,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create a new branch id: create-branch @@ -95,6 +95,7 @@ jobs: test_selection: cloud_regress pg_version: ${{matrix.pg-version}} extra_params: -m remote_cluster + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}} @@ -111,7 +112,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index 6773032263..fc33c0a980 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -13,7 +13,7 @@ on: # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - cron: '0 9 * * *' # run once a day, timezone is utc workflow_dispatch: # adds ability to run this manually - + defaults: run: shell: bash -euxo pipefail {0} @@ -28,7 +28,7 @@ jobs: strategy: fail-fast: false # allow other variants to continue even if one fails matrix: - target_project: [new_empty_project, large_existing_project] + target_project: [new_empty_project, large_existing_project] permissions: contents: write statuses: write @@ -56,7 +56,7 @@ jobs: with: aws-region: eu-central-1 role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role + role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role - name: Download Neon artifact uses: ./.github/actions/download @@ -64,7 +64,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project if: ${{ matrix.target_project == 'new_empty_project' }} @@ -95,7 +95,7 @@ jobs: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project + - name: Initialize Neon project if: ${{ matrix.target_project == 'large_existing_project' }} env: BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} @@ -123,7 +123,7 @@ jobs: ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - - name: Invoke pgcopydb + - name: Invoke pgcopydb uses: ./.github/actions/run-python-test-set with: build_type: remote @@ -132,7 +132,7 @@ jobs: extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb pg_version: v16 save_perf_report: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} TARGET_PROJECT_TYPE: ${{ matrix.target_project }} @@ -144,7 +144,7 @@ jobs: run: | export LD_LIBRARY_PATH=${PG_16_LIB_PATH} ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - + - name: Delete Neon Project if: ${{ always() && matrix.target_project == 'new_empty_project' }} uses: ./.github/actions/neon-project-delete diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 049990f17b..af877029e4 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -137,7 +137,7 @@ jobs: if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate with: - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml index 5c999d3810..4947907eb0 100644 --- a/.github/workflows/pg-clients.yml +++ b/.github/workflows/pg-clients.yml @@ -96,7 +96,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -113,6 +113,7 @@ jobs: run_in_parallel: false extra_params: -m remote_cluster pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} @@ -129,7 +130,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} @@ -163,7 +164,7 @@ jobs: name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact path: /tmp/neon/ prefix: latest - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project id: create-neon-project @@ -180,6 +181,7 @@ jobs: run_in_parallel: false extra_params: -m remote_cluster pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} @@ -196,7 +198,7 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true - aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} From 2521eba6741f000d71c1f6e0d2d0b279c8465f47 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 13 Dec 2024 22:46:41 +0200 Subject: [PATCH 10/89] Check for invalid down link while prefetching B-Tree leave pages for index-only scan (#9867) ## Problem See #9866 Index-only scan prefetch implementation doesn't take in account that down link may be invalid ## Summary of changes Check that downlink is valid block number Correspondent Postgres PRs: https://github.com/neondatabase/postgres/pull/534 https://github.com/neondatabase/postgres/pull/535 https://github.com/neondatabase/postgres/pull/536 https://github.com/neondatabase/postgres/pull/537 --------- Co-authored-by: Konstantin Knizhnik --- vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index 13ff324150..c2f65b3201 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit 13ff324150fceaac72920e01742addc053db9462 +Subproject commit c2f65b3201591e02ce45b66731392f98d3388e73 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 8736b10c1d..f262d631ad 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 8736b10c1d93d11b9c0489872dd529c4c0f5338f +Subproject commit f262d631ad477a1819e84a183e5a7ef561830085 diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 81428621f7..97f9fde349 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 81428621f7c04aed03671cf80a928e0a36d92505 +Subproject commit 97f9fde349c6de6d573f5ce96db07eca60ce6185 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 01fa3c4866..010c0ea2eb 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 01fa3c48664ca030cfb69bb4a350aa9df4691d88 +Subproject commit 010c0ea2eb06afe76485a33c43954cbcf3d99f86 diff --git a/vendor/revisions.json b/vendor/revisions.json index 7329aa437f..afcb922a5e 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ "17.2", - "01fa3c48664ca030cfb69bb4a350aa9df4691d88" + "010c0ea2eb06afe76485a33c43954cbcf3d99f86" ], "v16": [ "16.6", - "81428621f7c04aed03671cf80a928e0a36d92505" + "97f9fde349c6de6d573f5ce96db07eca60ce6185" ], "v15": [ "15.10", - "8736b10c1d93d11b9c0489872dd529c4c0f5338f" + "f262d631ad477a1819e84a183e5a7ef561830085" ], "v14": [ "14.15", - "13ff324150fceaac72920e01742addc053db9462" + "c2f65b3201591e02ce45b66731392f98d3388e73" ] } From cf161e1556229dae17b5b73e03c3b5781f7bf952 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Sat, 14 Dec 2024 17:37:13 +0000 Subject: [PATCH 11/89] fix(adapter): password not set in role drop (#10130) ## Problem When entry was dropped and password wasn't set, new entry had uninitialized memory in controlplane adapter Resolves: https://github.com/neondatabase/cloud/issues/14914 ## Summary of changes Initialize password in all cases, add tests. Minor formatting for less indentation --- pgxn/neon/control_plane_connector.c | 39 ++++++++++------------ test_runner/regress/test_ddl_forwarding.py | 29 ++++++++++++---- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/pgxn/neon/control_plane_connector.c b/pgxn/neon/control_plane_connector.c index b47b22cd20..59096a1bc8 100644 --- a/pgxn/neon/control_plane_connector.c +++ b/pgxn/neon/control_plane_connector.c @@ -428,6 +428,8 @@ MergeTable() hash_seq_init(&status, old_table->role_table); while ((entry = hash_seq_search(&status)) != NULL) { + RoleEntry * old; + bool found_old = false; RoleEntry *to_write = hash_search( CurrentDdlTable->role_table, entry->name, @@ -435,30 +437,23 @@ MergeTable() NULL); to_write->type = entry->type; - if (entry->password) - to_write->password = entry->password; + to_write->password = entry->password; strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN); - if (entry->old_name[0] != '\0') - { - bool found_old = false; - RoleEntry *old = hash_search( - CurrentDdlTable->role_table, - entry->old_name, - HASH_FIND, - &found_old); + if (entry->old_name[0] == '\0') + continue; - if (found_old) - { - if (old->old_name[0] != '\0') - strlcpy(to_write->old_name, old->old_name, NAMEDATALEN); - else - strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN); - hash_search(CurrentDdlTable->role_table, - entry->old_name, - HASH_REMOVE, - NULL); - } - } + old = hash_search( + CurrentDdlTable->role_table, + entry->old_name, + HASH_FIND, + &found_old); + if (!found_old) + continue; + strlcpy(to_write->old_name, old->old_name, NAMEDATALEN); + hash_search(CurrentDdlTable->role_table, + entry->old_name, + HASH_REMOVE, + NULL); } hash_destroy(old_table->role_table); } diff --git a/test_runner/regress/test_ddl_forwarding.py b/test_runner/regress/test_ddl_forwarding.py index de44bbcbc8..b10e38885e 100644 --- a/test_runner/regress/test_ddl_forwarding.py +++ b/test_runner/regress/test_ddl_forwarding.py @@ -60,14 +60,12 @@ def ddl_forward_handler( if request.json is None: log.info("Received invalid JSON") return Response(status=400) - json = request.json + json: dict[str, list[str]] = request.json # Handle roles first - if "roles" in json: - for operation in json["roles"]: - handle_role(dbs, roles, operation) - if "dbs" in json: - for operation in json["dbs"]: - handle_db(dbs, roles, operation) + for operation in json.get("roles", []): + handle_role(dbs, roles, operation) + for operation in json.get("dbs", []): + handle_db(dbs, roles, operation) return Response(status=200) @@ -207,6 +205,23 @@ def test_ddl_forwarding(ddl: DdlForwardingContext): ddl.wait() assert ddl.roles == {} + cur.execute("CREATE ROLE bork WITH PASSWORD 'newyork'") + cur.execute("BEGIN") + cur.execute("SAVEPOINT point") + cur.execute("DROP ROLE bork") + cur.execute("COMMIT") + ddl.wait() + assert ddl.roles == {} + + cur.execute("CREATE ROLE bork WITH PASSWORD 'oldyork'") + cur.execute("BEGIN") + cur.execute("SAVEPOINT point") + cur.execute("ALTER ROLE bork PASSWORD NULL") + cur.execute("COMMIT") + cur.execute("DROP ROLE bork") + ddl.wait() + assert ddl.roles == {} + cur.execute("CREATE ROLE bork WITH PASSWORD 'dork'") cur.execute("CREATE DATABASE stork WITH OWNER=bork") cur.execute("ALTER ROLE bork RENAME TO cork") From f3ecd5d76ad8b858b2bfaaabba5018046aca46ac Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 15 Dec 2024 10:45:12 +0100 Subject: [PATCH 12/89] pageserver: revert flush backpressure (#8550) (#10135) ## Problem In #8550, we made the flush loop wait for uploads after every layer. This was to avoid unbounded buildup of uploads, and to reduce compaction debt. However, the approach has several problems: * It prevents upload parallelism. * It prevents flush and upload pipelining. * It slows down ingestion even when there is no need to backpressure. * It does not directly backpressure WAL ingestion (only via `disk_consistent_lsn`), and will build up in-memory layers. * It does not directly backpressure based on compaction debt and read amplification. An alternative solution to these problems is proposed in #8390. In the meanwhile, we revert the change to reduce the impact on ingest throughput. This does reintroduce some risk of unbounded upload/compaction buildup. Until https://github.com/neondatabase/neon/issues/8390, this can be addressed in other ways: * Use `max_replication_apply_lag` (aka `remote_consistent_lsn`), which will more directly limit upload debt. * Shard the tenant, which will spread the flush/upload work across more Pageservers and move the bottleneck to Safekeeper. Touches #10095. ## Summary of changes Remove waiting on the upload queue in the flush loop. --- pageserver/src/metrics.rs | 25 +---------- pageserver/src/tenant/timeline.rs | 38 ++++------------- test_runner/fixtures/metrics.py | 1 - test_runner/regress/test_branching.py | 13 ++---- test_runner/regress/test_remote_storage.py | 48 ---------------------- 5 files changed, 13 insertions(+), 112 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b4e20cb8b9..bdbabf3f75 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -3,7 +3,7 @@ use metrics::{ register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec, - Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, + Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; @@ -445,15 +445,6 @@ pub(crate) static WAIT_LSN_TIME: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); -static FLUSH_WAIT_UPLOAD_TIME: Lazy = Lazy::new(|| { - register_gauge_vec!( - "pageserver_flush_wait_upload_seconds", - "Time spent waiting for preceding uploads during layer flush", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - static LAST_RECORD_LSN: Lazy = Lazy::new(|| { register_int_gauge_vec!( "pageserver_last_record_lsn", @@ -2586,7 +2577,6 @@ pub(crate) struct TimelineMetrics { shard_id: String, timeline_id: String, pub flush_time_histo: StorageTimeMetrics, - pub flush_wait_upload_time_gauge: Gauge, pub compact_time_histo: StorageTimeMetrics, pub create_images_time_histo: StorageTimeMetrics, pub logical_size_histo: StorageTimeMetrics, @@ -2632,9 +2622,6 @@ impl TimelineMetrics { &shard_id, &timeline_id, ); - let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME - .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) - .unwrap(); let compact_time_histo = StorageTimeMetrics::new( StorageTimeOperation::Compact, &tenant_id, @@ -2780,7 +2767,6 @@ impl TimelineMetrics { shard_id, timeline_id, flush_time_histo, - flush_wait_upload_time_gauge, compact_time_histo, create_images_time_histo, logical_size_histo, @@ -2830,14 +2816,6 @@ impl TimelineMetrics { self.resident_physical_size_gauge.get() } - pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) { - self.flush_wait_upload_time_gauge.add(duration); - crate::metrics::FLUSH_WAIT_UPLOAD_TIME - .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id]) - .unwrap() - .add(duration); - } - pub(crate) fn shutdown(&self) { let was_shutdown = self .shutdown @@ -2855,7 +2833,6 @@ impl TimelineMetrics { let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 75e268a1b9..0416953c1f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -144,19 +144,15 @@ use self::layer_manager::LayerManager; use self::logical_size::LogicalSize; use self::walreceiver::{WalReceiver, WalReceiverConf}; +use super::config::TenantConf; +use super::remote_timeline_client::index::IndexPart; +use super::remote_timeline_client::RemoteTimelineClient; +use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline}; +use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer}; +use super::upload_queue::NotInitialized; +use super::GcError; use super::{ - config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized, - MaybeOffloaded, -}; -use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf}; -use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe}; -use super::{ - remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError, - storage_layer::ReadableLayer, -}; -use super::{ - secondary::heatmap::{HeatMapLayer, HeatMapTimeline}, - GcError, + debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, MaybeOffloaded, }; #[cfg(test)] @@ -3897,24 +3893,6 @@ impl Timeline { // release lock on 'layers' }; - // Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files. - // This makes us refuse ingest until the new layers have been persisted to the remote - let start = Instant::now(); - self.remote_client - .wait_completion() - .await - .map_err(|e| match e { - WaitCompletionError::UploadQueueShutDownOrStopped - | WaitCompletionError::NotInitialized( - NotInitialized::ShuttingDown | NotInitialized::Stopped, - ) => FlushLayerError::Cancelled, - WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => { - FlushLayerError::Other(anyhow!(e).into()) - } - })?; - let duration = start.elapsed().as_secs_f64(); - self.metrics.flush_wait_upload_time_gauge_add(duration); - // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`, // a compaction can delete the file and then it won't be available for uploads any more. // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index c5295360c3..eb3d06b949 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -170,7 +170,6 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = ( "pageserver_evictions_with_low_residence_duration_total", "pageserver_aux_file_estimated_size", "pageserver_valid_lsn_lease_count", - "pageserver_flush_wait_upload_seconds", counter("pageserver_tenant_throttling_count_accounted_start"), counter("pageserver_tenant_throttling_count_accounted_finish"), counter("pageserver_tenant_throttling_wait_usecs_sum"), diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 34e4e994cb..a4056404f0 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -19,6 +19,7 @@ from fixtures.pageserver.utils import wait_until_tenant_active from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix from requests import RequestException +from requests.exceptions import RetryError # Test branch creation @@ -176,11 +177,8 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline) - with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"): - env.endpoints.create_start( - initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2 - ) - ps_http.configure_failpoints(("before-upload-index-pausable", "off")) + with pytest.raises(RuntimeError, match="is not active, state: Loading"): + env.endpoints.create_start(initial_branch, tenant_id=env.initial_tenant) finally: env.pageserver.stop(immediate=True) @@ -221,10 +219,7 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder branch_id = TimelineId.generate() - with pytest.raises( - PageserverApiException, - match="Cannot branch off the timeline that's not present in pageserver", - ): + with pytest.raises(RetryError, match="too many 503 error responses"): ps_http.timeline_create( env.pg_version, env.initial_tenant, diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 76a42ef4a2..52b6b254aa 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -784,54 +784,6 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv create_thread.join() -def test_paused_upload_stalls_checkpoint( - neon_env_builder: NeonEnvBuilder, -): - """ - This test checks that checkpoints block on uploads to remote storage. - """ - neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - - env = neon_env_builder.init_start( - initial_tenant_conf={ - # Set a small compaction threshold - "compaction_threshold": "3", - # Disable GC - "gc_period": "0s", - # disable PITR - "pitr_interval": "0s", - } - ) - - env.pageserver.allowed_errors.append( - f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing" - ) - - tenant_id = env.initial_tenant - timeline_id = env.initial_timeline - - client = env.pageserver.http_client() - layers_at_creation = client.layer_map_info(tenant_id, timeline_id) - deltas_at_creation = len(layers_at_creation.delta_layers()) - assert ( - deltas_at_creation == 1 - ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle" - - # Make new layer uploads get stuck. - # Note that timeline creation waits for the initial layers to reach remote storage. - # So at this point, the `layers_at_creation` are in remote storage. - client.configure_failpoints(("before-upload-layer-pausable", "pause")) - - with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: - # Build two tables with some data inside - endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)") - wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) - - with pytest.raises(ReadTimeout): - client.timeline_checkpoint(tenant_id, timeline_id, timeout=5) - client.configure_failpoints(("before-upload-layer-pausable", "off")) - - def wait_upload_queue_empty( client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId ): From 117c1b5ddec110677f06dbc769712a722b55f2d3 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 16 Dec 2024 08:03:53 +0200 Subject: [PATCH 13/89] Do not perform prefetch for temp relations (#10146) ## Problem See https://neondb.slack.com/archives/C04DGM6SMTM/p1734002916827019 With recent prefetch fixes for pg17 and `effective_io_concurrency=100` pg_regress test stats.sql is failed when set temp_buffers to 100. Stream API will try to lock all this 100 buffers for prefetch. ## Summary of changes Disable such behaviour for temp relations. Postgres PR: https://github.com/neondatabase/postgres/pull/548 Co-authored-by: Konstantin Knizhnik --- vendor/postgres-v17 | 2 +- vendor/revisions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 010c0ea2eb..65c4e46baf 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 010c0ea2eb06afe76485a33c43954cbcf3d99f86 +Subproject commit 65c4e46baf56ec05412c7dd63d62faff0b33dcfb diff --git a/vendor/revisions.json b/vendor/revisions.json index afcb922a5e..c8db81c73f 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,7 +1,7 @@ { "v17": [ "17.2", - "010c0ea2eb06afe76485a33c43954cbcf3d99f86" + "65c4e46baf56ec05412c7dd63d62faff0b33dcfb" ], "v16": [ "16.6", From ebcbc1a4822c566a865ca2dd9138e5af2838c794 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 16 Dec 2024 10:06:08 +0000 Subject: [PATCH 14/89] pageserver: tighten up code around SLRU dir key handling (#10082) ## Problem Changes in #9786 were functionally complete but missed some edges that made testing less robust than it should have been: - `is_key_disposable` didn't consider SLRU dir keys disposable - Timeline `init_empty` was always creating SLRU dir keys on all shards The result was that when we had a bug (https://github.com/neondatabase/neon/pull/10080), it wasn't apparent in tests, because one would only encounter the issue if running on a long-lived timeline with enough compaction to drop the initially created empty SLRU dir keys, _and_ some CLog truncation going on. Closes: https://github.com/neondatabase/cloud/issues/21516 ## Summary of changes - Update is_key_global and init_empty to handle SLRU dir keys properly -- the only functional impact is that we avoid writing some spurious keys in shards >0, but this makes testing much more robust. - Make `test_clog_truncate` explicitly use a sharded tenant The net result is that if one reverts #10080, then tests fail (i.e. this PR is a reproducer for the issue) --- libs/pageserver_api/src/key.rs | 4 ++++ libs/pageserver_api/src/shard.rs | 6 ++++- pageserver/src/pgdatadir_mapping.rs | 29 +++++++++++++---------- test_runner/regress/test_clog_truncate.py | 24 ++++++++++++------- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index 373329c9b4..f0cd713c38 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -565,6 +565,10 @@ impl Key { && self.field5 == 0 && self.field6 == u32::MAX } + + pub fn is_slru_dir_key(&self) -> bool { + slru_dir_kind(self).is_some() + } } #[inline(always)] diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index cf0cd3a46b..4cc0a739e8 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -173,7 +173,11 @@ impl ShardIdentity { /// Return true if the key should be stored on all shards, not just one. pub fn is_key_global(&self, key: &Key) -> bool { - if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() { + if key.is_slru_block_key() + || key.is_slru_segment_size_key() + || key.is_aux_file_key() + || key.is_slru_dir_key() + { // Special keys that are only stored on shard 0 false } else if key.is_rel_block_key() { diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 255bd01e25..3eaecd3a08 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1319,18 +1319,23 @@ impl<'a> DatadirModification<'a> { let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into(); let empty_dir = Value::Image(buf); - self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone()); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); - self.put( - slru_dir_to_key(SlruKind::MultiXactMembers), - empty_dir.clone(), - ); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); - self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir); - self.pending_directory_entries - .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0)); + + // Initialize SLRUs on shard 0 only: creating these on other shards would be + // harmless but they'd just be dropped on later compaction. + if self.tline.tenant_shard_id.is_shard_zero() { + self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone()); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); + self.put( + slru_dir_to_key(SlruKind::MultiXactMembers), + empty_dir.clone(), + ); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0)); + self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir); + self.pending_directory_entries + .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0)); + } Ok(()) } diff --git a/test_runner/regress/test_clog_truncate.py b/test_runner/regress/test_clog_truncate.py index 10027ce689..2ae38e6d88 100644 --- a/test_runner/regress/test_clog_truncate.py +++ b/test_runner/regress/test_clog_truncate.py @@ -1,18 +1,19 @@ from __future__ import annotations import os -import time from fixtures.log_helper import log -from fixtures.neon_fixtures import NeonEnv -from fixtures.utils import query_scalar +from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import query_scalar, wait_until # # Test compute node start after clog truncation # -def test_clog_truncate(neon_simple_env: NeonEnv): - env = neon_simple_env +def test_clog_truncate(neon_env_builder: NeonEnvBuilder): + # Use a multi-sharded tenant because WAL ingest logic is shard-dependent, and + # this test is one of the very few that exercises a CLogTruncate WAL record. + env = neon_env_builder.init_start(initial_tenant_shard_count=2) # set aggressive autovacuum to make sure that truncation will happen config = [ @@ -31,6 +32,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv): endpoint.safe_psql("CREATE EXTENSION neon_test_utils") # Consume many xids to advance clog + log.info("Consuming xids...") with endpoint.cursor() as cur: cur.execute("select test_consume_xids(1000*1000*10);") log.info("xids consumed") @@ -47,11 +49,17 @@ def test_clog_truncate(neon_simple_env: NeonEnv): pg_xact_0000_path = os.path.join(endpoint.pg_xact_dir_path(), "0000") log.info(f"pg_xact_0000_path = {pg_xact_0000_path}") - while os.path.isfile(pg_xact_0000_path): - log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") - time.sleep(5) + def assert_file_removed(): + exists = os.path.isfile(pg_xact_0000_path) + if exists: + log.info(f"file exists. wait for truncation: {pg_xact_0000_path=}") + assert not exists + + log.info("Waiting for truncation...") + wait_until(assert_file_removed) # checkpoint to advance latest lsn + log.info("Checkpointing...") with endpoint.cursor() as cur: cur.execute("CHECKPOINT;") lsn_after_truncation = query_scalar(cur, "select pg_current_wal_insert_lsn()") From 24d658791410d81d9b854ba1c8036068d6467bc2 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 11:15:25 +0000 Subject: [PATCH 15/89] chore(proxy): refactor self-signed config (#10154) ## Problem While reviewing #10152 I found it tricky to actually determine whether the connection used `allow_self_signed_compute` or not. I've tried to remove this setting in the past: * https://github.com/neondatabase/neon/pull/7884 * https://github.com/neondatabase/neon/pull/7437 * https://github.com/neondatabase/cloud/pull/13702 But each time it seems it is used by e2e tests ## Summary of changes The `node_info.allow_self_signed_computes` is always initialised to false, and then sometimes inherits the proxy config value. There's no need this needs to be in the node_info, so removing it and propagating it via `TcpMechansim` is simpler. --- proxy/src/auth/backend/console_redirect.rs | 1 - proxy/src/auth/backend/local.rs | 1 - proxy/src/console_redirect_proxy.rs | 2 +- proxy/src/control_plane/client/cplane_proxy_v1.rs | 1 - proxy/src/control_plane/client/mock.rs | 1 - proxy/src/control_plane/mod.rs | 13 +++---------- proxy/src/proxy/connect_compute.rs | 11 ++++++++--- proxy/src/proxy/mod.rs | 2 +- proxy/src/proxy/tests/mod.rs | 14 ++++++-------- proxy/src/redis/notifications.rs | 2 +- proxy/src/serverless/backend.rs | 2 -- 11 files changed, 20 insertions(+), 30 deletions(-) diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs index 575d60be85..c3de77b352 100644 --- a/proxy/src/auth/backend/console_redirect.rs +++ b/proxy/src/auth/backend/console_redirect.rs @@ -187,7 +187,6 @@ async fn authenticate( NodeInfo { config, aux: db_info.aux, - allow_self_signed_compute: false, // caller may override }, db_info.allowed_ips, )) diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index d4273fb521..d10f0e82b2 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -37,7 +37,6 @@ impl LocalBackend { branch_id: BranchIdTag::get_interner().get_or_intern("local"), cold_start_info: ColdStartInfo::WarmCached, }, - allow_self_signed_compute: false, }, } } diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 65702e0e4c..02398fb777 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -213,9 +213,9 @@ pub(crate) async fn handle_client( params_compat: true, params: ¶ms, locks: &config.connect_compute_locks, + allow_self_signed_compute: config.allow_self_signed_compute, }, &user_info, - config.allow_self_signed_compute, config.wake_compute_retry_config, config.connect_to_compute_retry_config, ) diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs index e33a37f643..00038a6ac6 100644 --- a/proxy/src/control_plane/client/cplane_proxy_v1.rs +++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs @@ -250,7 +250,6 @@ impl NeonControlPlaneClient { let node = NodeInfo { config, aux: body.aux, - allow_self_signed_compute: false, }; Ok(node) diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index eaf692ab27..93edd65476 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -174,7 +174,6 @@ impl MockControlPlane { branch_id: (&BranchId::from("branch")).into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, - allow_self_signed_compute: false, }; Ok(node) diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs index 41972e4e44..c0718920b4 100644 --- a/proxy/src/control_plane/mod.rs +++ b/proxy/src/control_plane/mod.rs @@ -67,28 +67,21 @@ pub(crate) struct NodeInfo { /// Labels for proxy's metrics. pub(crate) aux: MetricsAuxInfo, - - /// Whether we should accept self-signed certificates (for testing) - pub(crate) allow_self_signed_compute: bool, } impl NodeInfo { pub(crate) async fn connect( &self, ctx: &RequestContext, + allow_self_signed_compute: bool, timeout: Duration, ) -> Result { self.config - .connect( - ctx, - self.allow_self_signed_compute, - self.aux.clone(), - timeout, - ) + .connect(ctx, allow_self_signed_compute, self.aux.clone(), timeout) .await } + pub(crate) fn reuse_settings(&mut self, other: Self) { - self.allow_self_signed_compute = other.allow_self_signed_compute; self.config.reuse_password(other.config); } diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index a3027abd7c..6da4c90a53 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -73,6 +73,9 @@ pub(crate) struct TcpMechanism<'a> { /// connect_to_compute concurrency lock pub(crate) locks: &'static ApiLocks, + + /// Whether we should accept self-signed certificates (for testing) + pub(crate) allow_self_signed_compute: bool, } #[async_trait] @@ -90,7 +93,11 @@ impl ConnectMechanism for TcpMechanism<'_> { ) -> Result { let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; - permit.release_result(node_info.connect(ctx, timeout).await) + permit.release_result( + node_info + .connect(ctx, self.allow_self_signed_compute, timeout) + .await, + ) } fn update_connect_config(&self, config: &mut compute::ConnCfg) { @@ -104,7 +111,6 @@ pub(crate) async fn connect_to_compute Result @@ -117,7 +123,6 @@ where wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?; node_info.set_keys(user_info.get_keys()); - node_info.allow_self_signed_compute = allow_self_signed_compute; mechanism.update_connect_config(&mut node_info.config); // try once diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index cc04bc5e5c..de0ec0f799 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -355,9 +355,9 @@ pub(crate) async fn handle_client( params_compat, params: ¶ms, locks: &config.connect_compute_locks, + allow_self_signed_compute: mode.allow_self_signed_compute(config), }, &user_info, - mode.allow_self_signed_compute(config), config.wake_compute_retry_config, config.connect_to_compute_retry_config, ) diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 911b349416..3899ba4267 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -553,7 +553,6 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn branch_id: (&BranchId::from("branch")).into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, - allow_self_signed_compute: false, }; let (_, node2) = cache.insert_unit("key".into(), Ok(node.clone())); node2.map(|()| node) @@ -588,7 +587,7 @@ async fn connect_to_compute_success() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -606,7 +605,7 @@ async fn connect_to_compute_retry() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -625,7 +624,7 @@ async fn connect_to_compute_non_retry_1() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap_err(); mechanism.verify(); @@ -644,7 +643,7 @@ async fn connect_to_compute_non_retry_2() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -674,7 +673,6 @@ async fn connect_to_compute_non_retry_3() { &ctx, &mechanism, &user_info, - false, wake_compute_retry_config, connect_to_compute_retry_config, ) @@ -696,7 +694,7 @@ async fn wake_retry() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap(); mechanism.verify(); @@ -715,7 +713,7 @@ async fn wake_non_retry() { max_retries: 5, backoff_factor: 2.0, }; - connect_to_compute(&ctx, &mechanism, &user_info, false, config, config) + connect_to_compute(&ctx, &mechanism, &user_info, config, config) .await .unwrap_err(); mechanism.verify(); diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs index f3aa97c032..d18dfd2465 100644 --- a/proxy/src/redis/notifications.rs +++ b/proxy/src/redis/notifications.rs @@ -6,6 +6,7 @@ use pq_proto::CancelKeyData; use redis::aio::PubSub; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; +use tracing::Instrument; use uuid::Uuid; use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider; @@ -13,7 +14,6 @@ use crate::cache::project_info::ProjectInfoCache; use crate::cancellation::{CancelMap, CancellationHandler}; use crate::intern::{ProjectIdInt, RoleNameInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; -use tracing::Instrument; const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates"; pub(crate) const PROXY_CHANNEL_NAME: &str = "neondb-proxy-to-proxy-updates"; diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 251aa47084..15d883bdb0 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -195,7 +195,6 @@ impl PoolingBackend { locks: &self.config.connect_compute_locks, }, &backend, - false, // do not allow self signed compute for http flow self.config.wake_compute_retry_config, self.config.connect_to_compute_retry_config, ) @@ -237,7 +236,6 @@ impl PoolingBackend { locks: &self.config.connect_compute_locks, }, &backend, - false, // do not allow self signed compute for http flow self.config.wake_compute_retry_config, self.config.connect_to_compute_retry_config, ) From 1ed0e52bc837fd8eb356847e452444b5df1f4b0b Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 16 Dec 2024 15:07:24 +0300 Subject: [PATCH 16/89] Extract safekeeper http client to separate crate. (#10140) ## Problem We want to use safekeeper http client in storage controller and neon_local. ## Summary of changes Extract it to separate crate. No functional changes. --- Cargo.lock | 13 +++++++++++++ Cargo.toml | 2 ++ safekeeper/Cargo.toml | 1 + safekeeper/client/Cargo.toml | 13 +++++++++++++ safekeeper/client/src/lib.rs | 1 + .../{src/http/client.rs => client/src/mgmt_api.rs} | 4 ---- safekeeper/src/http/mod.rs | 1 - safekeeper/src/pull_timeline.rs | 5 +++-- 8 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 safekeeper/client/Cargo.toml create mode 100644 safekeeper/client/src/lib.rs rename safekeeper/{src/http/client.rs => client/src/mgmt_api.rs} (96%) diff --git a/Cargo.lock b/Cargo.lock index c4f80f63c9..d1f7746969 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5535,6 +5535,7 @@ dependencies = [ "remote_storage", "reqwest", "safekeeper_api", + "safekeeper_client", "scopeguard", "sd-notify", "serde", @@ -5572,6 +5573,18 @@ dependencies = [ "utils", ] +[[package]] +name = "safekeeper_client" +version = "0.1.0" +dependencies = [ + "reqwest", + "safekeeper_api", + "serde", + "thiserror", + "utils", + "workspace_hack", +] + [[package]] name = "same-file" version = "1.0.6" diff --git a/Cargo.toml b/Cargo.toml index 0654c25a3d..056cd5798f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "pageserver/pagebench", "proxy", "safekeeper", + "safekeeper/client", "storage_broker", "storage_controller", "storage_controller/client", @@ -233,6 +234,7 @@ postgres_initdb = { path = "./libs/postgres_initdb" } pq_proto = { version = "0.1", path = "./libs/pq_proto/" } remote_storage = { version = "0.1", path = "./libs/remote_storage/" } safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" } +safekeeper_client = { path = "./safekeeper/client" } desim = { version = "0.1", path = "./libs/desim" } storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy. storage_controller_client = { path = "./storage_controller/client" } diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml index 0422c46ab1..086407603f 100644 --- a/safekeeper/Cargo.toml +++ b/safekeeper/Cargo.toml @@ -55,6 +55,7 @@ postgres_ffi.workspace = true pq_proto.workspace = true remote_storage.workspace = true safekeeper_api.workspace = true +safekeeper_client.workspace = true sha2.workspace = true sd-notify.workspace = true storage_broker.workspace = true diff --git a/safekeeper/client/Cargo.toml b/safekeeper/client/Cargo.toml new file mode 100644 index 0000000000..6c5a52de3a --- /dev/null +++ b/safekeeper/client/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "safekeeper_client" +version = "0.1.0" +edition.workspace = true +license.workspace = true + +[dependencies] +safekeeper_api.workspace = true +thiserror.workspace = true +reqwest = { workspace = true, features = [ "stream" ] } +serde.workspace = true +utils.workspace = true +workspace_hack = { version = "0.1", path = "../../workspace_hack" } diff --git a/safekeeper/client/src/lib.rs b/safekeeper/client/src/lib.rs new file mode 100644 index 0000000000..3963fd466c --- /dev/null +++ b/safekeeper/client/src/lib.rs @@ -0,0 +1 @@ +pub mod mgmt_api; diff --git a/safekeeper/src/http/client.rs b/safekeeper/client/src/mgmt_api.rs similarity index 96% rename from safekeeper/src/http/client.rs rename to safekeeper/client/src/mgmt_api.rs index 669a9c0ce9..f78745043a 100644 --- a/safekeeper/src/http/client.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -2,10 +2,6 @@ //! //! Partially copied from pageserver client; some parts might be better to be //! united. -//! -//! It would be also good to move it out to separate crate, but this needs -//! duplication of internal-but-reported structs like WalSenderState, ServerInfo -//! etc. use reqwest::{IntoUrl, Method, StatusCode}; use safekeeper_api::models::TimelineStatus; diff --git a/safekeeper/src/http/mod.rs b/safekeeper/src/http/mod.rs index 7229ccb739..d82a713f8a 100644 --- a/safekeeper/src/http/mod.rs +++ b/safekeeper/src/http/mod.rs @@ -1,4 +1,3 @@ -pub mod client; pub mod routes; pub use routes::make_router; diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index 00777273cb..f2d8e4c85f 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -5,6 +5,8 @@ use chrono::{DateTime, Utc}; use futures::{SinkExt, StreamExt, TryStreamExt}; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; use safekeeper_api::{models::TimelineStatus, Term}; +use safekeeper_client::mgmt_api; +use safekeeper_client::mgmt_api::Client; use serde::{Deserialize, Serialize}; use std::{ cmp::min, @@ -22,7 +24,6 @@ use tracing::{error, info, instrument}; use crate::{ control_file::CONTROL_FILE_NAME, debug_dump, - http::client::{self, Client}, state::{EvictionState, TimelinePersistentState}, timeline::{Timeline, WalResidentTimeline}, timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}, @@ -419,7 +420,7 @@ pub async fn handle_request( let http_hosts = request.http_hosts.clone(); // Figure out statuses of potential donors. - let responses: Vec> = + let responses: Vec> = futures::future::join_all(http_hosts.iter().map(|url| async { let cclient = Client::new(url.clone(), sk_auth_token.clone()); let info = cclient From c5e3314c6e3a3d693310626a4c9c8deec34931c3 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 16 Dec 2024 17:53:04 +0300 Subject: [PATCH 17/89] Add test restarting compute at WAL page boundary (#10111) ## Problem We've had similar test in test_logical_replication, but then removed it because it wasn't needed to trigger LR related bug. Restarting at WAL page boundary is still a useful test, so add it separately back. ## Summary of changes Add the test. --- test_runner/regress/test_wal_acceptor.py | 56 ++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 23d4f23cdb..0a8900b351 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1090,6 +1090,62 @@ def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder): endpoint.safe_psql("SELECT 'works'") +# Test restarting compute at WAL page boundary. +def test_restart_endpoint_wal_page_boundary(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + + ep = env.endpoints.create_start("main") + ep.safe_psql("create table t (i int)") + + with ep.cursor() as cur: + # measure how much space logical message takes. Sometimes first attempt + # creates huge message and then it stabilizes, have no idea why. + for _ in range(3): + lsn_before = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + log.info(f"current_lsn={lsn_before}") + # Non-transactional logical message doesn't write WAL, only XLogInsert's + # it, so use transactional. Which is a bit problematic as transactional + # necessitates commit record. Alternatively we can do smth like + # select neon_xlogflush(pg_current_wal_insert_lsn()); + # but isn't much better + that particular call complains on 'xlog flush + # request 0/282C018 is not satisfied' as pg_current_wal_insert_lsn skips + # page headers. + payload = "blahblah" + cur.execute(f"select pg_logical_emit_message(true, 'pref', '{payload}')") + lsn_after_by_curr_wal_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + lsn_diff = lsn_after_by_curr_wal_lsn - lsn_before + logical_message_base = lsn_after_by_curr_wal_lsn - lsn_before - len(payload) + log.info( + f"before {lsn_before}, after {lsn_after_by_curr_wal_lsn}, lsn diff is {lsn_diff}, base {logical_message_base}" + ) + + # and write logical message spanning exactly as we want + lsn_before = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + log.info(f"current_lsn={lsn_before}") + curr_lsn = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + offs = int(curr_lsn) % 8192 + till_page = 8192 - offs + target_lsn = curr_lsn + till_page + payload_len = ( + till_page - logical_message_base - 8 + ) # not sure why 8 is here, it is deduced from experiments + log.info( + f"current_lsn={curr_lsn}, offs {offs}, till_page {till_page}, target_lsn {target_lsn}" + ) + + cur.execute(f"select pg_logical_emit_message(true, 'pref', 'f{'a' * payload_len}')") + supposedly_contrecord_end = Lsn(query_scalar(cur, "select pg_current_wal_lsn()")) + log.info(f"supposedly_page_boundary={supposedly_contrecord_end}") + # The calculations to hit the page boundary are very fuzzy, so just + # ignore test if we fail to reach it. + if not (int(supposedly_contrecord_end) % 8192 == 0): + pytest.skip(f"missed page boundary, bad luck: lsn is {supposedly_contrecord_end}") + + ep.stop(mode="immediate") + ep = env.endpoints.create_start("main") + ep.safe_psql("insert into t values (42)") # should be ok + + # Context manager which logs passed time on exit. class DurationLogger: def __init__(self, desc): From 6565fd4056e0c040f26cc593a03561fe775595ff Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 15:33:21 +0000 Subject: [PATCH 18/89] chore: fix clippy lints 2024-12-06 (#10138) --- libs/desim/src/time.rs | 2 +- .../wal_craft/src/xlog_utils_test.rs | 2 +- .../proxy/tokio-postgres2/src/to_statement.rs | 2 +- libs/remote_storage/src/azure_blob.rs | 4 +-- libs/remote_storage/src/lib.rs | 4 +-- libs/remote_storage/src/local_fs.rs | 4 +-- libs/remote_storage/src/s3_bucket.rs | 4 +-- libs/remote_storage/src/simulate_failures.rs | 4 +-- pageserver/compaction/src/compact_tiered.rs | 2 +- pageserver/compaction/src/identify_levels.rs | 5 ++-- pageserver/compaction/src/interface.rs | 2 +- pageserver/compaction/src/simulator.rs | 2 +- pageserver/src/basebackup.rs | 2 +- pageserver/src/pgdatadir_mapping.rs | 8 +++--- pageserver/src/tenant/blob_io.rs | 2 +- pageserver/src/tenant/block_io.rs | 2 +- pageserver/src/tenant/disk_btree.rs | 2 +- pageserver/src/tenant/ephemeral_file.rs | 6 ++--- pageserver/src/tenant/layer_map.rs | 4 +-- .../tenant/remote_timeline_client/download.rs | 4 +-- .../tenant/remote_timeline_client/upload.rs | 4 +-- pageserver/src/tenant/storage_layer.rs | 5 +--- .../src/tenant/storage_layer/delta_layer.rs | 4 +-- .../src/tenant/storage_layer/image_layer.rs | 2 +- .../inmemory_layer/vectored_dio_read.rs | 26 +++++++++---------- pageserver/src/tenant/timeline.rs | 2 +- pageserver/src/tenant/timeline/compaction.rs | 4 +-- safekeeper/src/receive_wal.rs | 2 +- safekeeper/src/safekeeper.rs | 5 +--- storage_controller/src/service.rs | 5 +--- 30 files changed, 58 insertions(+), 68 deletions(-) diff --git a/libs/desim/src/time.rs b/libs/desim/src/time.rs index 7bb71db95c..7ce605bda8 100644 --- a/libs/desim/src/time.rs +++ b/libs/desim/src/time.rs @@ -91,7 +91,7 @@ impl Timing { /// Return true if there is a ready event. fn is_event_ready(&self, queue: &mut BinaryHeap) -> bool { - queue.peek().map_or(false, |x| x.time <= self.now()) + queue.peek().is_some_and(|x| x.time <= self.now()) } /// Clear all pending events. diff --git a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs index 9eb3f0e95a..4a33dbe25b 100644 --- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs +++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs @@ -81,7 +81,7 @@ fn test_end_of_wal(test_name: &str) { continue; } let mut f = File::options().write(true).open(file.path()).unwrap(); - const ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE]; + static ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE]; f.write_all( &ZEROS[0..min( WAL_SEGMENT_SIZE, diff --git a/libs/proxy/tokio-postgres2/src/to_statement.rs b/libs/proxy/tokio-postgres2/src/to_statement.rs index 427f77dd79..7e12992728 100644 --- a/libs/proxy/tokio-postgres2/src/to_statement.rs +++ b/libs/proxy/tokio-postgres2/src/to_statement.rs @@ -11,7 +11,7 @@ mod private { Query(&'a str), } - impl<'a> ToStatementType<'a> { + impl ToStatementType<'_> { pub async fn into_statement(self, client: &Client) -> Result { match self { ToStatementType::Statement(s) => Ok(s.clone()), diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 32c51bc2ad..19c8251ccd 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -544,9 +544,9 @@ impl RemoteStorage for AzureBlobStorage { .await } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { let kind = RequestKind::Delete; diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 2a3468f986..7a864151ec 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -341,9 +341,9 @@ pub trait RemoteStorage: Send + Sync + 'static { /// If the operation fails because of timeout or cancellation, the root cause of the error will be /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went /// through. - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()>; diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index 1a2d421c66..a8b00173ba 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -562,9 +562,9 @@ impl RemoteStorage for LocalFs { } } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { for path in paths { diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 2891f92d07..d3f19f0b11 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -813,9 +813,9 @@ impl RemoteStorage for S3Bucket { .await } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { let kind = RequestKind::Delete; diff --git a/libs/remote_storage/src/simulate_failures.rs b/libs/remote_storage/src/simulate_failures.rs index 51833c1fe6..63c24beb51 100644 --- a/libs/remote_storage/src/simulate_failures.rs +++ b/libs/remote_storage/src/simulate_failures.rs @@ -181,9 +181,9 @@ impl RemoteStorage for UnreliableWrapper { self.delete_inner(path, true, cancel).await } - async fn delete_objects<'a>( + async fn delete_objects( &self, - paths: &'a [RemotePath], + paths: &[RemotePath], cancel: &CancellationToken, ) -> anyhow::Result<()> { self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?; diff --git a/pageserver/compaction/src/compact_tiered.rs b/pageserver/compaction/src/compact_tiered.rs index 20f88868f9..7779ffaf8b 100644 --- a/pageserver/compaction/src/compact_tiered.rs +++ b/pageserver/compaction/src/compact_tiered.rs @@ -272,7 +272,7 @@ struct CompactionJob { completed: bool, } -impl<'a, E> LevelCompactionState<'a, E> +impl LevelCompactionState<'_, E> where E: CompactionJobExecutor, { diff --git a/pageserver/compaction/src/identify_levels.rs b/pageserver/compaction/src/identify_levels.rs index 1853afffdd..e04bd15396 100644 --- a/pageserver/compaction/src/identify_levels.rs +++ b/pageserver/compaction/src/identify_levels.rs @@ -224,9 +224,8 @@ impl Level { } // recalculate depth if this was the last event at this point - let more_events_at_this_key = events_iter - .peek() - .map_or(false, |next_e| next_e.key == e.key); + let more_events_at_this_key = + events_iter.peek().is_some_and(|next_e| next_e.key == e.key); if !more_events_at_this_key { let mut active_depth = 0; for (_end_lsn, is_image, _idx) in active_set.iter().rev() { diff --git a/pageserver/compaction/src/interface.rs b/pageserver/compaction/src/interface.rs index 5bc9b5ca1d..8ed393a645 100644 --- a/pageserver/compaction/src/interface.rs +++ b/pageserver/compaction/src/interface.rs @@ -148,7 +148,7 @@ pub trait CompactionDeltaLayer: CompactionLay Self: 'a; /// Return all keys in this delta layer. - fn load_keys<'a>( + fn load_keys( &self, ctx: &E::RequestContext, ) -> impl Future>>> + Send; diff --git a/pageserver/compaction/src/simulator.rs b/pageserver/compaction/src/simulator.rs index 776c537d03..673b80c313 100644 --- a/pageserver/compaction/src/simulator.rs +++ b/pageserver/compaction/src/simulator.rs @@ -143,7 +143,7 @@ impl interface::CompactionLayer for Arc { impl interface::CompactionDeltaLayer for Arc { type DeltaEntry<'a> = MockRecord; - async fn load_keys<'a>(&self, _ctx: &MockRequestContext) -> anyhow::Result> { + async fn load_keys(&self, _ctx: &MockRequestContext) -> anyhow::Result> { Ok(self.records.clone()) } } diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index cae0ffb980..e1b5676f46 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -248,7 +248,7 @@ where } } -impl<'a, W> Basebackup<'a, W> +impl Basebackup<'_, W> where W: AsyncWrite + Send + Sync + Unpin, { diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 3eaecd3a08..14c7e0d2f8 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1242,7 +1242,7 @@ pub struct DatadirModification<'a> { pending_metadata_bytes: usize, } -impl<'a> DatadirModification<'a> { +impl DatadirModification<'_> { // When a DatadirModification is committed, we do a monolithic serialization of all its contents. WAL records can // contain multiple pages, so the pageserver's record-based batch size isn't sufficient to bound this allocation: we // additionally specify a limit on how much payload a DatadirModification may contain before it should be committed. @@ -1263,7 +1263,7 @@ impl<'a> DatadirModification<'a> { pub(crate) fn has_dirty_data(&self) -> bool { self.pending_data_batch .as_ref() - .map_or(false, |b| b.has_data()) + .is_some_and(|b| b.has_data()) } /// Set the current lsn @@ -2230,7 +2230,7 @@ impl<'a> DatadirModification<'a> { assert!(!self .pending_data_batch .as_ref() - .map_or(false, |b| b.updates_key(&key))); + .is_some_and(|b| b.updates_key(&key))); } } @@ -2299,7 +2299,7 @@ pub enum Version<'a> { Modified(&'a DatadirModification<'a>), } -impl<'a> Version<'a> { +impl Version<'_> { async fn get( &self, timeline: &Timeline, diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index dd70f6bbff..7b55df52a5 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -35,7 +35,7 @@ pub struct CompressionInfo { pub compressed_size: Option, } -impl<'a> BlockCursor<'a> { +impl BlockCursor<'_> { /// Read a blob into a new buffer. pub async fn read_blob( &self, diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs index 2bd7f2d619..990211f80a 100644 --- a/pageserver/src/tenant/block_io.rs +++ b/pageserver/src/tenant/block_io.rs @@ -89,7 +89,7 @@ pub(crate) enum BlockReaderRef<'a> { VirtualFile(&'a VirtualFile), } -impl<'a> BlockReaderRef<'a> { +impl BlockReaderRef<'_> { #[inline(always)] async fn read_blk( &self, diff --git a/pageserver/src/tenant/disk_btree.rs b/pageserver/src/tenant/disk_btree.rs index b302cbc975..c77342b144 100644 --- a/pageserver/src/tenant/disk_btree.rs +++ b/pageserver/src/tenant/disk_btree.rs @@ -532,7 +532,7 @@ pub struct DiskBtreeIterator<'a> { >, } -impl<'a> DiskBtreeIterator<'a> { +impl DiskBtreeIterator<'_> { pub async fn next(&mut self) -> Option, u64), DiskBtreeError>> { self.stream.next().await } diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index aaec8a4c31..ba79672bc7 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -174,11 +174,11 @@ impl EphemeralFile { } impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, dst: tokio_epoll_uring::Slice, - ctx: &'a RequestContext, + ctx: &RequestContext, ) -> std::io::Result<(tokio_epoll_uring::Slice, usize)> { let submitted_offset = self.buffered_writer.bytes_submitted(); diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 7f15baed10..1b6924425c 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -392,8 +392,8 @@ impl LayerMap { image_layer: Option>, end_lsn: Lsn, ) -> Option { - assert!(delta_layer.as_ref().map_or(true, |l| l.is_delta())); - assert!(image_layer.as_ref().map_or(true, |l| !l.is_delta())); + assert!(delta_layer.as_ref().is_none_or(|l| l.is_delta())); + assert!(image_layer.as_ref().is_none_or(|l| !l.is_delta())); match (delta_layer, image_layer) { (None, None) => None, diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index d15f161fb6..b4d45dca75 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -145,8 +145,8 @@ pub async fn download_layer_file<'a>( /// /// If Err() is returned, there was some error. The file at `dst_path` has been unlinked. /// The unlinking has _not_ been made durable. -async fn download_object<'a>( - storage: &'a GenericRemoteStorage, +async fn download_object( + storage: &GenericRemoteStorage, src_path: &RemotePath, dst_path: &Utf8PathBuf, #[cfg_attr(target_os = "macos", allow(unused_variables))] gate: &utils::sync::gate::Gate, diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 0cd5d05aa2..e434d24e5f 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -25,8 +25,8 @@ use utils::id::{TenantId, TimelineId}; use tracing::info; /// Serializes and uploads the given index part data to the remote storage. -pub(crate) async fn upload_index_part<'a>( - storage: &'a GenericRemoteStorage, +pub(crate) async fn upload_index_part( + storage: &GenericRemoteStorage, tenant_shard_id: &TenantShardId, timeline_id: &TimelineId, generation: Generation, diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 9e3a25cbbc..b8206fca5a 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -345,10 +345,7 @@ impl LayerFringe { } pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range)> { - let read_desc = match self.planned_visits_by_lsn.pop() { - Some(desc) => desc, - None => return None, - }; + let read_desc = self.planned_visits_by_lsn.pop()?; let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id); diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index fec8a0a16c..ade1b794c6 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -1486,7 +1486,7 @@ pub struct ValueRef<'a> { layer: &'a DeltaLayerInner, } -impl<'a> ValueRef<'a> { +impl ValueRef<'_> { /// Loads the value from disk pub async fn load(&self, ctx: &RequestContext) -> Result { let buf = self.load_raw(ctx).await?; @@ -1543,7 +1543,7 @@ pub struct DeltaLayerIterator<'a> { is_end: bool, } -impl<'a> DeltaLayerIterator<'a> { +impl DeltaLayerIterator<'_> { pub(crate) fn layer_dbg_info(&self) -> String { self.delta_layer.layer_dbg_info() } diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 834d1931d0..0d3c9d5a44 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -1052,7 +1052,7 @@ pub struct ImageLayerIterator<'a> { is_end: bool, } -impl<'a> ImageLayerIterator<'a> { +impl ImageLayerIterator<'_> { pub(crate) fn layer_dbg_info(&self) -> String { self.image_layer.layer_dbg_info() } diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs index a4bb3a6bfc..1d86015fab 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs @@ -25,11 +25,11 @@ pub trait File: Send { /// [`std::io::ErrorKind::UnexpectedEof`] error if the file is shorter than `start+dst.len()`. /// /// No guarantees are made about the remaining bytes in `dst` in case of a short read. - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, dst: Slice, - ctx: &'a RequestContext, + ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)>; } @@ -479,11 +479,11 @@ mod tests { } impl File for InMemoryFile { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, mut dst: Slice, - _ctx: &'a RequestContext, + _ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)> { let dst_slice: &mut [u8] = dst.as_mut_rust_slice_full_zeroed(); let nread = { @@ -609,12 +609,12 @@ mod tests { } } - impl<'x> File for RecorderFile<'x> { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>( - &'b self, + impl File for RecorderFile<'_> { + async fn read_exact_at_eof_ok( + &self, start: u64, dst: Slice, - ctx: &'a RequestContext, + ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)> { let (dst, nread) = self.file.read_exact_at_eof_ok(start, dst, ctx).await?; self.recorded.borrow_mut().push(RecordedRead { @@ -740,11 +740,11 @@ mod tests { } impl File for MockFile { - async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>( - &'b self, + async fn read_exact_at_eof_ok( + &self, start: u64, mut dst: Slice, - _ctx: &'a RequestContext, + _ctx: &RequestContext, ) -> std::io::Result<(Slice, usize)> { let ExpectedRead { expect_pos, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 0416953c1f..87f5a03382 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5842,7 +5842,7 @@ enum OpenLayerAction { None, } -impl<'a> TimelineWriter<'a> { +impl TimelineWriter<'_> { async fn handle_open_layer_action( &mut self, at: Lsn, diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 5e6290729c..8b6cc8ed84 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1110,7 +1110,7 @@ impl Timeline { return Err(CompactionError::ShuttingDown); } - let same_key = prev_key.map_or(false, |prev_key| prev_key == key); + let same_key = prev_key == Some(key); // We need to check key boundaries once we reach next key or end of layer with the same key if !same_key || lsn == dup_end_lsn { let mut next_key_size = 0u64; @@ -2904,7 +2904,7 @@ impl CompactionLayer for ResidentDeltaLayer { impl CompactionDeltaLayer for ResidentDeltaLayer { type DeltaEntry<'a> = DeltaEntry<'a>; - async fn load_keys<'a>(&self, ctx: &RequestContext) -> anyhow::Result>> { + async fn load_keys(&self, ctx: &RequestContext) -> anyhow::Result>> { self.0.get_as_delta(ctx).await?.index_entries(ctx).await } } diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 08371177cd..3e9ce1da8e 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -318,7 +318,7 @@ struct NetworkReader<'a, IO> { global_timelines: Arc, } -impl<'a, IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'a, IO> { +impl NetworkReader<'_, IO> { async fn read_first_message( &mut self, ) -> Result<(WalResidentTimeline, ProposerAcceptorMessage), CopyStreamHandlerEnd> { diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index ccd7940c72..6ceaf325b0 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -125,10 +125,7 @@ impl TermHistory { ); last_common_idx = Some(i); } - let last_common_idx = match last_common_idx { - None => return None, // no common point - Some(lci) => lci, - }; + let last_common_idx = last_common_idx?; // Now find where it ends at both prop and sk and take min. End of // (common) term is the start of the next except it is the last one; // there it is flush_lsn in case of safekeeper or, in case of proposer diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 746177c089..a89e4741f6 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -6873,10 +6873,7 @@ impl Service { let mut plan = Vec::new(); for (node_id, attached) in nodes_by_load { - let available = locked - .nodes - .get(&node_id) - .map_or(false, |n| n.is_available()); + let available = locked.nodes.get(&node_id).is_some_and(|n| n.is_available()); if !available { continue; } From 3d30a7a9348efbb0d1faca6d44dccc1885c5b8c9 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 16 Dec 2024 16:54:47 +0100 Subject: [PATCH 19/89] pageserver: make `RemoteTimelineClient::schedule_index_upload` infallible (#10155) Remove an unnecessary `Result` and address a `FIXME`. --- .../src/tenant/remote_timeline_client.rs | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 20e0536a00..fee11bc742 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -749,7 +749,7 @@ impl RemoteTimelineClient { // ahead of what's _actually_ on the remote during index upload. upload_queue.dirty.metadata = metadata.clone(); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Ok(()) } @@ -770,7 +770,7 @@ impl RemoteTimelineClient { upload_queue.dirty.metadata.apply(update); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Ok(()) } @@ -809,7 +809,7 @@ impl RemoteTimelineClient { if let Some(archived_at_set) = need_upload_scheduled { let intended_archived_at = archived_at_set.then(|| Utc::now().naive_utc()); upload_queue.dirty.archived_at = intended_archived_at; - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); } let need_wait = need_change(&upload_queue.clean.0.archived_at, state).is_some(); @@ -824,7 +824,7 @@ impl RemoteTimelineClient { let mut guard = self.upload_queue.lock().unwrap(); let upload_queue = guard.initialized_mut()?; upload_queue.dirty.import_pgdata = state; - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Ok(()) } @@ -843,17 +843,14 @@ impl RemoteTimelineClient { let upload_queue = guard.initialized_mut()?; if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 { - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); } Ok(()) } /// Launch an index-file upload operation in the background (internal function) - fn schedule_index_upload( - self: &Arc, - upload_queue: &mut UploadQueueInitialized, - ) -> Result<(), NotInitialized> { + fn schedule_index_upload(self: &Arc, upload_queue: &mut UploadQueueInitialized) { let disk_consistent_lsn = upload_queue.dirty.metadata.disk_consistent_lsn(); // fix up the duplicated field upload_queue.dirty.disk_consistent_lsn = disk_consistent_lsn; @@ -880,7 +877,6 @@ impl RemoteTimelineClient { // Launch the task immediately, if possible self.launch_queued_tasks(upload_queue); - Ok(()) } /// Reparent this timeline to a new parent. @@ -909,7 +905,7 @@ impl RemoteTimelineClient { upload_queue.dirty.metadata.reparent(new_parent); upload_queue.dirty.lineage.record_previous_ancestor(&prev); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } @@ -948,7 +944,7 @@ impl RemoteTimelineClient { assert!(prev.is_none(), "copied layer existed already {layer}"); } - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } @@ -1004,7 +1000,7 @@ impl RemoteTimelineClient { upload_queue.dirty.gc_blocking = current .map(|x| x.with_reason(reason)) .or_else(|| Some(index::GcBlocking::started_now_for(reason))); - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } } @@ -1057,8 +1053,7 @@ impl RemoteTimelineClient { upload_queue.dirty.gc_blocking = current.as_ref().and_then(|x| x.without_reason(reason)); assert!(wanted(upload_queue.dirty.gc_blocking.as_ref())); - // FIXME: bogus ? - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); Some(self.schedule_barrier0(upload_queue)) } } @@ -1125,8 +1120,8 @@ impl RemoteTimelineClient { let mut guard = self.upload_queue.lock().unwrap(); let upload_queue = guard.initialized_mut()?; - let with_metadata = self - .schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned())?; + let with_metadata = + self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned()); self.schedule_deletion_of_unlinked0(upload_queue, with_metadata); @@ -1153,7 +1148,7 @@ impl RemoteTimelineClient { let names = gc_layers.iter().map(|x| x.layer_desc().layer_name()); - self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?; + self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names); self.launch_queued_tasks(upload_queue); @@ -1166,7 +1161,7 @@ impl RemoteTimelineClient { self: &Arc, upload_queue: &mut UploadQueueInitialized, names: I, - ) -> Result, NotInitialized> + ) -> Vec<(LayerName, LayerFileMetadata)> where I: IntoIterator, { @@ -1208,10 +1203,10 @@ impl RemoteTimelineClient { // index_part update, because that needs to be uploaded before we can actually delete the // files. if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 { - self.schedule_index_upload(upload_queue)?; + self.schedule_index_upload(upload_queue); } - Ok(with_metadata) + with_metadata } /// Schedules deletion for layer files which have previously been unlinked from the @@ -1302,7 +1297,7 @@ impl RemoteTimelineClient { let names = compacted_from.iter().map(|x| x.layer_desc().layer_name()); - self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?; + self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names); self.launch_queued_tasks(upload_queue); Ok(()) From 2e4c9c570491b6747e65bdeba64dc55f0dec2ea3 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 16:11:39 +0000 Subject: [PATCH 20/89] chore(proxy): remove allow_self_signed from regular proxy (#10157) I noticed that the only place we use this flag is for testing console redirect proxy. Makes sense to me to make this assumption more explicit. --- proxy/src/proxy/mod.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index de0ec0f799..5db92d748a 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -191,13 +191,6 @@ impl ClientMode { } } - pub(crate) fn allow_self_signed_compute(&self, config: &ProxyConfig) -> bool { - match self { - ClientMode::Tcp => config.allow_self_signed_compute, - ClientMode::Websockets { .. } => false, - } - } - fn hostname<'a, S>(&'a self, s: &'a Stream) -> Option<&'a str> { match self { ClientMode::Tcp => s.sni_hostname(), @@ -355,7 +348,8 @@ pub(crate) async fn handle_client( params_compat, params: ¶ms, locks: &config.connect_compute_locks, - allow_self_signed_compute: mode.allow_self_signed_compute(config), + // only used for console redirect testing. + allow_self_signed_compute: false, }, &user_info, config.wake_compute_retry_config, From 59b7ff89881ac218f35f545ce38f556d08af36dd Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 16 Dec 2024 16:37:15 +0000 Subject: [PATCH 21/89] chore(proxy): disallow unwrap and unimplemented (#10142) As the title says, I updated the lint rules to no longer allow unwrap or unimplemented. Three special cases: * Tests are allowed to use them * std::sync::Mutex lock().unwrap() is common because it's usually correct to continue panicking on poison * `tokio::spawn_blocking(...).await.unwrap()` is common because it will only error if the blocking fn panics, so continuing the panic is also correct I've introduced two extension traits to help with these last two, that are a bit more explicit so they don't need an expect message every time. --- proxy/src/auth/backend/jwt.rs | 1 + proxy/src/auth/backend/mod.rs | 2 + proxy/src/auth/credentials.rs | 1 + proxy/src/cache/endpoints.rs | 4 +- proxy/src/cache/project_info.rs | 1 + proxy/src/cancellation.rs | 4 +- proxy/src/config.rs | 13 ++---- proxy/src/context/parquet.rs | 14 ++++-- proxy/src/control_plane/client/mock.rs | 4 +- proxy/src/ext.rs | 41 +++++++++++++++++ proxy/src/http/health_server.rs | 7 +-- proxy/src/intern.rs | 3 +- proxy/src/lib.rs | 5 ++- proxy/src/logging.rs | 12 ++++- proxy/src/parse.rs | 15 ------- proxy/src/protocol2.rs | 1 + proxy/src/proxy/copy_bidirectional.rs | 1 + proxy/src/proxy/mod.rs | 2 +- proxy/src/proxy/tests/mod.rs | 1 + proxy/src/rate_limiter/leaky_bucket.rs | 2 +- .../src/rate_limiter/limit_algorithm/aimd.rs | 1 + proxy/src/rate_limiter/limiter.rs | 4 +- .../connection_with_credentials_provider.rs | 6 ++- proxy/src/sasl/messages.rs | 5 ++- proxy/src/scram/messages.rs | 1 + proxy/src/scram/mod.rs | 1 + proxy/src/scram/secret.rs | 1 + proxy/src/scram/threadpool.rs | 16 +++---- proxy/src/serverless/backend.rs | 6 ++- proxy/src/serverless/conn_pool.rs | 34 ++++++-------- proxy/src/serverless/conn_pool_lib.rs | 45 +++++++++---------- proxy/src/serverless/http_util.rs | 11 +++-- proxy/src/serverless/json.rs | 6 ++- proxy/src/serverless/local_conn_pool.rs | 11 ++--- proxy/src/serverless/mod.rs | 5 ++- proxy/src/serverless/sql_over_http.rs | 1 + proxy/src/serverless/websocket.rs | 1 + proxy/src/url.rs | 1 + proxy/src/usage_metrics.rs | 1 + 39 files changed, 178 insertions(+), 113 deletions(-) create mode 100644 proxy/src/ext.rs diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs index a258090b15..df716f8455 100644 --- a/proxy/src/auth/backend/jwt.rs +++ b/proxy/src/auth/backend/jwt.rs @@ -776,6 +776,7 @@ impl From<&jose_jwk::Key> for KeyType { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::future::IntoFuture; use std::net::SocketAddr; diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index f38ecf715f..50cb94bfa0 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -463,6 +463,8 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials> { #[cfg(test)] mod tests { + #![allow(clippy::unimplemented, clippy::unwrap_used)] + use std::net::IpAddr; use std::sync::Arc; use std::time::Duration; diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index f6bce9f2d8..eff49a402a 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -250,6 +250,7 @@ fn project_name_valid(name: &str) -> bool { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use serde_json::json; use ComputeUserInfoParseError::*; diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs index 20db1fbb14..0136446d6d 100644 --- a/proxy/src/cache/endpoints.rs +++ b/proxy/src/cache/endpoints.rs @@ -12,6 +12,7 @@ use tracing::info; use crate::config::EndpointCacheConfig; use crate::context::RequestContext; +use crate::ext::LockExt; use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; use crate::rate_limiter::GlobalRateLimiter; @@ -96,7 +97,7 @@ impl EndpointsCache { // If the limiter allows, we can pretend like it's valid // (incase it is, due to redis channel lag). - if self.limiter.lock().unwrap().check() { + if self.limiter.lock_propagate_poison().check() { return true; } @@ -258,6 +259,7 @@ impl EndpointsCache { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/cache/project_info.rs b/proxy/src/cache/project_info.rs index 84430dc812..cab0b8b905 100644 --- a/proxy/src/cache/project_info.rs +++ b/proxy/src/cache/project_info.rs @@ -365,6 +365,7 @@ impl Cache for ProjectInfoCacheImpl { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; use crate::scram::ServerSecret; diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index ed717507ee..dd3edd6abc 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -13,6 +13,7 @@ use uuid::Uuid; use crate::auth::{check_peer_addr_is_in_list, IpPattern}; use crate::error::ReportableError; +use crate::ext::LockExt; use crate::metrics::{CancellationRequest, CancellationSource, Metrics}; use crate::rate_limiter::LeakyBucketRateLimiter; use crate::redis::cancellation_publisher::{ @@ -114,7 +115,7 @@ impl CancellationHandler

{ IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()), }; - if !self.limiter.lock().unwrap().check(subnet_key, 1) { + if !self.limiter.lock_propagate_poison().check(subnet_key, 1) { // log only the subnet part of the IP address to know which subnet is rate limited tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}"); Metrics::get() @@ -283,6 +284,7 @@ impl

Drop for Session

{ } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/config.rs b/proxy/src/config.rs index 8bc8e3f96f..1f991d595e 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -221,15 +221,10 @@ impl CertResolver { ) -> anyhow::Result<()> { let priv_key = { let key_bytes = std::fs::read(key_path) - .context(format!("Failed to read TLS keys at '{key_path}'"))?; - let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec(); - - ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len()); - PrivateKeyDer::Pkcs8( - keys.pop() - .unwrap() - .context(format!("Failed to parse TLS keys at '{key_path}'"))?, - ) + .with_context(|| format!("Failed to read TLS keys at '{key_path}'"))?; + rustls_pemfile::private_key(&mut &key_bytes[..]) + .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? + .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? }; let cert_chain_bytes = std::fs::read(cert_path) diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index 3105d08526..5f65b17374 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -23,6 +23,7 @@ use utils::backoff; use super::{RequestContextInner, LOG_CHAN}; use crate::config::remote_storage_from_toml; use crate::context::LOG_CHAN_DISCONNECT; +use crate::ext::TaskExt; #[derive(clap::Args, Clone, Debug)] pub struct ParquetUploadArgs { @@ -171,7 +172,9 @@ pub async fn worker( }; let (tx, mut rx) = mpsc::unbounded_channel(); - LOG_CHAN.set(tx.downgrade()).unwrap(); + LOG_CHAN + .set(tx.downgrade()) + .expect("only one worker should set the channel"); // setup row stream that will close on cancellation let cancellation_token2 = cancellation_token.clone(); @@ -207,7 +210,9 @@ pub async fn worker( config.parquet_upload_disconnect_events_remote_storage { let (tx_disconnect, mut rx_disconnect) = mpsc::unbounded_channel(); - LOG_CHAN_DISCONNECT.set(tx_disconnect.downgrade()).unwrap(); + LOG_CHAN_DISCONNECT + .set(tx_disconnect.downgrade()) + .expect("only one worker should set the channel"); // setup row stream that will close on cancellation tokio::spawn(async move { @@ -326,7 +331,7 @@ where Ok::<_, parquet::errors::ParquetError>((rows, w, rg_meta)) }) .await - .unwrap()?; + .propagate_task_panic()?; rows.clear(); Ok((rows, w, rg_meta)) @@ -352,7 +357,7 @@ async fn upload_parquet( Ok((buffer, metadata)) }) .await - .unwrap()?; + .propagate_task_panic()?; let data = buffer.split().freeze(); @@ -409,6 +414,7 @@ async fn upload_parquet( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::net::Ipv4Addr; use std::num::NonZeroUsize; diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index 93edd65476..5f8bda0f35 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -102,7 +102,9 @@ impl MockControlPlane { Some(s) => { info!("got allowed_ips: {s}"); s.split(',') - .map(|s| IpPattern::from_str(s).unwrap()) + .map(|s| { + IpPattern::from_str(s).expect("mocked ip pattern should be correct") + }) .collect() } None => vec![], diff --git a/proxy/src/ext.rs b/proxy/src/ext.rs new file mode 100644 index 0000000000..8d00afbf51 --- /dev/null +++ b/proxy/src/ext.rs @@ -0,0 +1,41 @@ +use std::panic::resume_unwind; +use std::sync::{Mutex, MutexGuard}; + +use tokio::task::JoinError; + +pub(crate) trait LockExt { + fn lock_propagate_poison(&self) -> MutexGuard<'_, T>; +} + +impl LockExt for Mutex { + /// Lock the mutex and panic if the mutex was poisoned. + #[track_caller] + fn lock_propagate_poison(&self) -> MutexGuard<'_, T> { + match self.lock() { + Ok(guard) => guard, + // poison occurs when another thread panicked while holding the lock guard. + // since panicking is often unrecoverable, propagating the poison panic is reasonable. + Err(poison) => panic!("{poison}"), + } + } +} + +pub(crate) trait TaskExt { + fn propagate_task_panic(self) -> T; +} + +impl TaskExt for Result { + /// Unwrap the result and panic if the inner task panicked. + /// Also panics if the task was cancelled + #[track_caller] + fn propagate_task_panic(self) -> T { + match self { + Ok(t) => t, + // Using resume_unwind prevents the panic hook being called twice. + // Since we use this for structured concurrency, there is only + // 1 logical panic, so this is more correct. + Err(e) if e.is_panic() => resume_unwind(e.into_panic()), + Err(e) => panic!("unexpected task error: {e}"), + } + } +} diff --git a/proxy/src/http/health_server.rs b/proxy/src/http/health_server.rs index 978ad9f761..6ca091feb7 100644 --- a/proxy/src/http/health_server.rs +++ b/proxy/src/http/health_server.rs @@ -14,6 +14,7 @@ use utils::http::error::ApiError; use utils::http::json::json_response; use utils::http::{RouterBuilder, RouterService}; +use crate::ext::{LockExt, TaskExt}; use crate::jemalloc; async fn status_handler(_: Request) -> Result, ApiError> { @@ -76,7 +77,7 @@ async fn prometheus_metrics_handler( let body = tokio::task::spawn_blocking(move || { let _span = span.entered(); - let mut state = state.lock().unwrap(); + let mut state = state.lock_propagate_poison(); let PrometheusHandler { encoder, metrics } = &mut *state; metrics @@ -94,13 +95,13 @@ async fn prometheus_metrics_handler( body }) .await - .unwrap(); + .propagate_task_panic(); let response = Response::builder() .status(200) .header(CONTENT_TYPE, "text/plain; version=0.0.4") .body(Body::from(body)) - .unwrap(); + .expect("response headers should be valid"); Ok(response) } diff --git a/proxy/src/intern.rs b/proxy/src/intern.rs index f56d92a6b3..79c6020302 100644 --- a/proxy/src/intern.rs +++ b/proxy/src/intern.rs @@ -83,7 +83,7 @@ impl StringInterner { pub(crate) fn new() -> Self { StringInterner { inner: ThreadedRodeo::with_capacity_memory_limits_and_hasher( - Capacity::new(2500, NonZeroUsize::new(1 << 16).unwrap()), + Capacity::new(2500, NonZeroUsize::new(1 << 16).expect("value is nonzero")), // unbounded MemoryLimits::for_memory_usage(usize::MAX), BuildHasherDefault::::default(), @@ -207,6 +207,7 @@ impl From for ProjectIdInt { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::sync::OnceLock; diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs index ba69f9cf2d..a5a72f26d9 100644 --- a/proxy/src/lib.rs +++ b/proxy/src/lib.rs @@ -22,8 +22,8 @@ clippy::string_add, clippy::string_to_string, clippy::todo, - // TODO: consider clippy::unimplemented - // TODO: consider clippy::unwrap_used + clippy::unimplemented, + clippy::unwrap_used, )] // List of permanently allowed lints. #![allow( @@ -82,6 +82,7 @@ pub mod console_redirect_proxy; pub mod context; pub mod control_plane; pub mod error; +mod ext; pub mod http; pub mod intern; pub mod jemalloc; diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index 74d2b9a1d0..41f10f052f 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -18,8 +18,16 @@ pub async fn init() -> anyhow::Result { let env_filter = EnvFilter::builder() .with_default_directive(LevelFilter::INFO.into()) .from_env_lossy() - .add_directive("aws_config=info".parse().unwrap()) - .add_directive("azure_core::policies::transport=off".parse().unwrap()); + .add_directive( + "aws_config=info" + .parse() + .expect("this should be a valid filter directive"), + ) + .add_directive( + "azure_core::policies::transport=off" + .parse() + .expect("this should be a valid filter directive"), + ); let fmt_layer = tracing_subscriber::fmt::layer() .with_ansi(false) diff --git a/proxy/src/parse.rs b/proxy/src/parse.rs index 8c0f251066..095d6278cc 100644 --- a/proxy/src/parse.rs +++ b/proxy/src/parse.rs @@ -8,14 +8,6 @@ pub(crate) fn split_cstr(bytes: &[u8]) -> Option<(&CStr, &[u8])> { Some((cstr, other)) } -/// See . -pub(crate) fn split_at_const(bytes: &[u8]) -> Option<(&[u8; N], &[u8])> { - (bytes.len() >= N).then(|| { - let (head, tail) = bytes.split_at(N); - (head.try_into().unwrap(), tail) - }) -} - #[cfg(test)] mod tests { use super::*; @@ -33,11 +25,4 @@ mod tests { assert_eq!(cstr.to_bytes(), b"foo"); assert_eq!(rest, b"bar"); } - - #[test] - fn test_split_at_const() { - assert!(split_at_const::<0>(b"").is_some()); - assert!(split_at_const::<1>(b"").is_none()); - assert!(matches!(split_at_const::<1>(b"ok"), Some((b"o", b"k")))); - } } diff --git a/proxy/src/protocol2.rs b/proxy/src/protocol2.rs index 33a5eb5e1e..0dc97b7097 100644 --- a/proxy/src/protocol2.rs +++ b/proxy/src/protocol2.rs @@ -396,6 +396,7 @@ impl NetworkEndianIpv6 { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use tokio::io::AsyncReadExt; diff --git a/proxy/src/proxy/copy_bidirectional.rs b/proxy/src/proxy/copy_bidirectional.rs index 4e4af88634..3336a9556a 100644 --- a/proxy/src/proxy/copy_bidirectional.rs +++ b/proxy/src/proxy/copy_bidirectional.rs @@ -257,6 +257,7 @@ impl CopyBuffer { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use tokio::io::AsyncWriteExt; diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 5db92d748a..4e5ecda237 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -488,7 +488,7 @@ impl NeonOptions { pub(crate) fn neon_option(bytes: &str) -> Option<(&str, &str)> { static RE: OnceCell = OnceCell::new(); - let re = RE.get_or_init(|| Regex::new(r"^neon_(\w+):(.+)").unwrap()); + let re = RE.get_or_init(|| Regex::new(r"^neon_(\w+):(.+)").expect("regex should be correct")); let cap = re.captures(bytes)?; let (_, [k, v]) = cap.extract(); diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 3899ba4267..95c518fed9 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -1,4 +1,5 @@ //! A group of high-level tests for connection establishing logic and auth. +#![allow(clippy::unimplemented, clippy::unwrap_used)] mod mitm; diff --git a/proxy/src/rate_limiter/leaky_bucket.rs b/proxy/src/rate_limiter/leaky_bucket.rs index 45f9630dde..bff800f0a2 100644 --- a/proxy/src/rate_limiter/leaky_bucket.rs +++ b/proxy/src/rate_limiter/leaky_bucket.rs @@ -83,7 +83,7 @@ impl From for utils::leaky_bucket::LeakyBucketConfig { } #[cfg(test)] -#[allow(clippy::float_cmp)] +#[allow(clippy::float_cmp, clippy::unwrap_used)] mod tests { use std::time::Duration; diff --git a/proxy/src/rate_limiter/limit_algorithm/aimd.rs b/proxy/src/rate_limiter/limit_algorithm/aimd.rs index 3000cc4c2a..04e136b6d5 100644 --- a/proxy/src/rate_limiter/limit_algorithm/aimd.rs +++ b/proxy/src/rate_limiter/limit_algorithm/aimd.rs @@ -63,6 +63,7 @@ impl LimitAlgorithm for Aimd { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::time::Duration; diff --git a/proxy/src/rate_limiter/limiter.rs b/proxy/src/rate_limiter/limiter.rs index a048721e77..6f6a8c9d47 100644 --- a/proxy/src/rate_limiter/limiter.rs +++ b/proxy/src/rate_limiter/limiter.rs @@ -12,6 +12,7 @@ use rand::{Rng, SeedableRng}; use tokio::time::{Duration, Instant}; use tracing::info; +use crate::ext::LockExt; use crate::intern::EndpointIdInt; pub struct GlobalRateLimiter { @@ -246,12 +247,13 @@ impl BucketRateLimiter { let n = self.map.shards().len(); // this lock is ok as the periodic cycle of do_gc makes this very unlikely to collide // (impossible, infact, unless we have 2048 threads) - let shard = self.rand.lock().unwrap().gen_range(0..n); + let shard = self.rand.lock_propagate_poison().gen_range(0..n); self.map.shards()[shard].write().clear(); } } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::hash::BuildHasherDefault; use std::time::Duration; diff --git a/proxy/src/redis/connection_with_credentials_provider.rs b/proxy/src/redis/connection_with_credentials_provider.rs index 82139ea1d5..0f6e765b02 100644 --- a/proxy/src/redis/connection_with_credentials_provider.rs +++ b/proxy/src/redis/connection_with_credentials_provider.rs @@ -69,7 +69,11 @@ impl ConnectionWithCredentialsProvider { pub fn new_with_static_credentials(params: T) -> Self { Self { - credentials: Credentials::Static(params.into_connection_info().unwrap()), + credentials: Credentials::Static( + params + .into_connection_info() + .expect("static configured redis credentials should be a valid format"), + ), con: None, refresh_token_task: None, mutex: tokio::sync::Mutex::new(()), diff --git a/proxy/src/sasl/messages.rs b/proxy/src/sasl/messages.rs index 1373dfba3d..4922ece615 100644 --- a/proxy/src/sasl/messages.rs +++ b/proxy/src/sasl/messages.rs @@ -2,7 +2,7 @@ use pq_proto::{BeAuthenticationSaslMessage, BeMessage}; -use crate::parse::{split_at_const, split_cstr}; +use crate::parse::split_cstr; /// SASL-specific payload of [`PasswordMessage`](pq_proto::FeMessage::PasswordMessage). #[derive(Debug)] @@ -19,7 +19,7 @@ impl<'a> FirstMessage<'a> { let (method_cstr, tail) = split_cstr(bytes)?; let method = method_cstr.to_str().ok()?; - let (len_bytes, bytes) = split_at_const(tail)?; + let (len_bytes, bytes) = tail.split_first_chunk()?; let len = u32::from_be_bytes(*len_bytes) as usize; if len != bytes.len() { return None; @@ -51,6 +51,7 @@ impl<'a> ServerMessage<&'a str> { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/scram/messages.rs b/proxy/src/scram/messages.rs index 5ee3a51352..0e54e7ded9 100644 --- a/proxy/src/scram/messages.rs +++ b/proxy/src/scram/messages.rs @@ -185,6 +185,7 @@ impl fmt::Debug for OwnedServerFirstMessage { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/scram/mod.rs b/proxy/src/scram/mod.rs index 718445f61d..b49a9f32ee 100644 --- a/proxy/src/scram/mod.rs +++ b/proxy/src/scram/mod.rs @@ -57,6 +57,7 @@ fn sha256<'a>(parts: impl IntoIterator) -> [u8; 32] { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::threadpool::ThreadPool; use super::{Exchange, ServerSecret}; diff --git a/proxy/src/scram/secret.rs b/proxy/src/scram/secret.rs index 8c6a08d432..eb21b26ab4 100644 --- a/proxy/src/scram/secret.rs +++ b/proxy/src/scram/secret.rs @@ -72,6 +72,7 @@ impl ServerSecret { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/scram/threadpool.rs b/proxy/src/scram/threadpool.rs index ebc6dd2a3c..8f1684c75b 100644 --- a/proxy/src/scram/threadpool.rs +++ b/proxy/src/scram/threadpool.rs @@ -33,14 +33,11 @@ thread_local! { } impl ThreadPool { - pub fn new(n_workers: u8) -> Arc { + pub fn new(mut n_workers: u8) -> Arc { // rayon would be nice here, but yielding in rayon does not work well afaict. if n_workers == 0 { - return Arc::new(Self { - runtime: None, - metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)), - }); + n_workers = 1; } Arc::new_cyclic(|pool| { @@ -66,7 +63,7 @@ impl ThreadPool { }); }) .build() - .unwrap(); + .expect("password threadpool runtime should be configured correctly"); Self { runtime: Some(runtime), @@ -79,7 +76,7 @@ impl ThreadPool { JobHandle( self.runtime .as_ref() - .unwrap() + .expect("runtime is always set") .spawn(JobSpec { pbkdf2, endpoint }), ) } @@ -87,7 +84,10 @@ impl ThreadPool { impl Drop for ThreadPool { fn drop(&mut self) { - self.runtime.take().unwrap().shutdown_background(); + self.runtime + .take() + .expect("runtime is always set") + .shutdown_background(); } } diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 15d883bdb0..449d50b6e7 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -268,7 +268,11 @@ impl PoolingBackend { if !self.local_pool.initialized(&conn_info) { // only install and grant usage one at a time. - let _permit = local_backend.initialize.acquire().await.unwrap(); + let _permit = local_backend + .initialize + .acquire() + .await + .expect("semaphore should never be closed"); // check again for race if !self.local_pool.initialized(&conn_info) { diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index cac5a173cb..447103edce 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -186,8 +186,8 @@ impl ClientDataRemote { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { - use std::mem; use std::sync::atomic::AtomicBool; use super::*; @@ -269,39 +269,33 @@ mod tests { assert_eq!(0, pool.get_global_connections_count()); } { - let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); + drop(client); assert_eq!(1, pool.get_global_connections_count()); } { - let mut closed_client = Client::new( + let closed_client = Client::new( create_inner_with(MockClient::new(true)), conn_info.clone(), ep_pool.clone(), ); - closed_client.do_drop().unwrap()(); - mem::forget(closed_client); // drop the client - // The closed client shouldn't be added to the pool. + drop(closed_client); assert_eq!(1, pool.get_global_connections_count()); } let is_closed: Arc = Arc::new(false.into()); { - let mut client = Client::new( + let client = Client::new( create_inner_with(MockClient(is_closed.clone())), conn_info.clone(), ep_pool.clone(), ); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client - + drop(client); // The client should be added to the pool. assert_eq!(2, pool.get_global_connections_count()); } { - let mut client = Client::new(create_inner(), conn_info, ep_pool); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info, ep_pool); + drop(client); // The client shouldn't be added to the pool. Because the ep-pool is full. assert_eq!(2, pool.get_global_connections_count()); @@ -319,15 +313,13 @@ mod tests { &pool.get_or_create_endpoint_pool(&conn_info.endpoint_cache_key().unwrap()), ); { - let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); + drop(client); assert_eq!(3, pool.get_global_connections_count()); } { - let mut client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); - client.do_drop().unwrap()(); - mem::forget(client); // drop the client + let client = Client::new(create_inner(), conn_info.clone(), ep_pool.clone()); + drop(client); // The client shouldn't be added to the pool. Because the global pool is full. assert_eq!(3, pool.get_global_connections_count()); diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs index 2a46c8f9c5..44eac77e8f 100644 --- a/proxy/src/serverless/conn_pool_lib.rs +++ b/proxy/src/serverless/conn_pool_lib.rs @@ -187,19 +187,22 @@ impl EndpointConnPool { pub(crate) fn put(pool: &RwLock, conn_info: &ConnInfo, client: ClientInnerCommon) { let conn_id = client.get_conn_id(); - let pool_name = pool.read().get_name().to_string(); + let (max_conn, conn_count, pool_name) = { + let pool = pool.read(); + ( + pool.global_pool_size_max_conns, + pool.global_connections_count + .load(atomic::Ordering::Relaxed), + pool.get_name().to_string(), + ) + }; + if client.inner.is_closed() { info!(%conn_id, "{}: throwing away connection '{conn_info}' because connection is closed", pool_name); return; } - let global_max_conn = pool.read().global_pool_size_max_conns; - if pool - .read() - .global_connections_count - .load(atomic::Ordering::Relaxed) - >= global_max_conn - { + if conn_count >= max_conn { info!(%conn_id, "{}: throwing away connection '{conn_info}' because pool is full", pool_name); return; } @@ -633,35 +636,29 @@ impl Client { } pub(crate) fn metrics(&self) -> Arc { - let aux = &self.inner.as_ref().unwrap().aux; + let aux = &self + .inner + .as_ref() + .expect("client inner should not be removed") + .aux; USAGE_METRICS.register(Ids { endpoint_id: aux.endpoint_id, branch_id: aux.branch_id, }) } +} - pub(crate) fn do_drop(&mut self) -> Option> { +impl Drop for Client { + fn drop(&mut self) { let conn_info = self.conn_info.clone(); let client = self .inner .take() .expect("client inner should not be removed"); if let Some(conn_pool) = std::mem::take(&mut self.pool).upgrade() { - let current_span = self.span.clone(); + let _current_span = self.span.enter(); // return connection to the pool - return Some(move || { - let _span = current_span.enter(); - EndpointConnPool::put(&conn_pool, &conn_info, client); - }); - } - None - } -} - -impl Drop for Client { - fn drop(&mut self) { - if let Some(drop) = self.do_drop() { - tokio::task::spawn_blocking(drop); + EndpointConnPool::put(&conn_pool, &conn_info, client); } } } diff --git a/proxy/src/serverless/http_util.rs b/proxy/src/serverless/http_util.rs index c0208d4f68..d5c948777c 100644 --- a/proxy/src/serverless/http_util.rs +++ b/proxy/src/serverless/http_util.rs @@ -81,11 +81,14 @@ impl HttpErrorBody { .header(http::header::CONTENT_TYPE, "application/json") // we do not have nested maps with non string keys so serialization shouldn't fail .body( - Full::new(Bytes::from(serde_json::to_string(self).unwrap())) - .map_err(|x| match x {}) - .boxed(), + Full::new(Bytes::from( + serde_json::to_string(self) + .expect("serialising HttpErrorBody should never fail"), + )) + .map_err(|x| match x {}) + .boxed(), ) - .unwrap() + .expect("content-type header should be valid") } } diff --git a/proxy/src/serverless/json.rs b/proxy/src/serverless/json.rs index 25b25c66d3..ab012bd020 100644 --- a/proxy/src/serverless/json.rs +++ b/proxy/src/serverless/json.rs @@ -204,7 +204,10 @@ fn pg_array_parse_inner( if c == '\\' { escaped = true; - (i, c) = pg_array_chr.next().unwrap(); + let Some(x) = pg_array_chr.next() else { + return Err(JsonConversionError::UnbalancedArray); + }; + (i, c) = x; } match c { @@ -253,6 +256,7 @@ fn pg_array_parse_inner( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use serde_json::json; diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs index b84cde9e25..c51a2bc9ba 100644 --- a/proxy/src/serverless/local_conn_pool.rs +++ b/proxy/src/serverless/local_conn_pool.rs @@ -179,7 +179,6 @@ pub(crate) fn poll_client( info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, "new connection"); }); let pool = Arc::downgrade(&global_pool); - let pool_clone = pool.clone(); let db_user = conn_info.db_and_user(); let idle = global_pool.get_idle_timeout(); @@ -273,11 +272,7 @@ pub(crate) fn poll_client( }), }; - Client::new( - inner, - conn_info, - Arc::downgrade(&pool_clone.upgrade().unwrap().global_pool), - ) + Client::new(inner, conn_info, Arc::downgrade(&global_pool.global_pool)) } impl ClientInnerCommon { @@ -321,7 +316,8 @@ fn resign_jwt(sk: &SigningKey, payload: &[u8], jti: u64) -> Result(buffer.format(jti)).unwrap(); + let jti = serde_json::from_str::<&RawValue>(buffer.format(jti)) + .expect("itoa formatted integer should be guaranteed valid json"); // update the jti in-place let payload = @@ -368,6 +364,7 @@ fn sign_jwt(sk: &SigningKey, payload: &[u8]) -> String { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use p256::ecdsa::SigningKey; use typed_json::json; diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index 80b42f9e55..c2623e0eca 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -46,6 +46,7 @@ use utils::http::error::ApiError; use crate::cancellation::CancellationHandlerMain; use crate::config::{ProxyConfig, ProxyProtocolV2}; use crate::context::RequestContext; +use crate::ext::TaskExt; use crate::metrics::Metrics; use crate::protocol2::{read_proxy_protocol, ChainRW, ConnectHeader, ConnectionInfo}; use crate::proxy::run_until_cancelled; @@ -84,7 +85,7 @@ pub async fn task_main( cancellation_token.cancelled().await; tokio::task::spawn_blocking(move || conn_pool.shutdown()) .await - .unwrap(); + .propagate_task_panic(); } }); @@ -104,7 +105,7 @@ pub async fn task_main( cancellation_token.cancelled().await; tokio::task::spawn_blocking(move || http_conn_pool.shutdown()) .await - .unwrap(); + .propagate_task_panic(); } }); diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 5e85f5ec40..3e42787a09 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -1110,6 +1110,7 @@ impl Discard<'_> { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs index bdb83fe6be..812fedaf04 100644 --- a/proxy/src/serverless/websocket.rs +++ b/proxy/src/serverless/websocket.rs @@ -178,6 +178,7 @@ pub(crate) async fn serve_websocket( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::pin::pin; diff --git a/proxy/src/url.rs b/proxy/src/url.rs index 270cd7c24d..d73a84057a 100644 --- a/proxy/src/url.rs +++ b/proxy/src/url.rs @@ -50,6 +50,7 @@ impl std::fmt::Display for ApiUrl { } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use super::*; diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs index 65e74466f2..487504d709 100644 --- a/proxy/src/usage_metrics.rs +++ b/proxy/src/usage_metrics.rs @@ -407,6 +407,7 @@ async fn upload_backup_events( } #[cfg(test)] +#[expect(clippy::unwrap_used)] mod tests { use std::fs; use std::io::BufReader; From 28ccda0a63f9661d961dbb1e5372f4a78d80346d Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 16 Dec 2024 18:10:55 +0100 Subject: [PATCH 22/89] test_runner: ignore error in `test_timeline_archival_chaos` (#10161) Resolves #10159. --- test_runner/regress/test_timeline_archive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index addf702893..87579f9e92 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -426,6 +426,7 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): [ ".*removing local file.*because it has unexpected length.*", ".*__temp.*", + ".*method=POST path=\\S+/timeline .*: Not activating a Stopping timeline.*", # FIXME: there are still anyhow::Error paths in timeline creation/deletion which # generate 500 results when called during shutdown (https://github.com/neondatabase/neon/issues/9768) ".*InternalServerError.*", From aa7ab9b3aca4c399e6caa5fd8fb31e6f0a7723b0 Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Mon, 16 Dec 2024 19:56:24 +0100 Subject: [PATCH 23/89] proxy: Allow dumping TLS session keys for debugging (#10163) ## Problem To debug issues with TLS connections there's no easy way to decrypt packets unless a client has special support for logging the keys. ## Summary of changes Add TLS session keys logging to proxy via `SSLKEYLOGFILE` env var gated by flag. --- proxy/src/bin/proxy.rs | 4 ++++ proxy/src/config.rs | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index 97c4037009..e90555e250 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -105,6 +105,9 @@ struct ProxyCliArgs { /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir #[clap(short = 'c', long, alias = "ssl-cert")] tls_cert: Option, + /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`. + #[clap(long, alias = "allow-ssl-keylogfile")] + allow_tls_keylogfile: bool, /// path to directory with TLS certificates for client postgres connections #[clap(long)] certs_dir: Option, @@ -555,6 +558,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { key_path, cert_path, args.certs_dir.as_ref(), + args.allow_tls_keylogfile, )?), (None, None) => None, _ => bail!("either both or neither tls-key and tls-cert must be specified"), diff --git a/proxy/src/config.rs b/proxy/src/config.rs index 1f991d595e..debd77ac32 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -95,6 +95,7 @@ pub fn configure_tls( key_path: &str, cert_path: &str, certs_dir: Option<&String>, + allow_tls_keylogfile: bool, ) -> anyhow::Result { let mut cert_resolver = CertResolver::new(); @@ -135,6 +136,11 @@ pub fn configure_tls( config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()]; + if allow_tls_keylogfile { + // KeyLogFile will check for the SSLKEYLOGFILE environment variable. + config.key_log = Arc::new(rustls::KeyLogFile::new()); + } + Ok(TlsConfig { config: Arc::new(config), common_names, From e226d7a3d1901c1a89f640c8d95b2efd057628b1 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Tue, 17 Dec 2024 09:16:54 +0100 Subject: [PATCH 24/89] Fix docker compose with PG17 (#10165) ## Problem It's impossible to run docker compose with compute v17 due to `pg_anon` extension which is not supported under PG17. ## Summary of changes The auto-loading of `pg_anon` is disabled by default --- .../compute_wrapper/var/db/postgres/specs/spec.json | 5 ----- docker-compose/docker_compose_test.sh | 12 ++++++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json index 8e582e74e1..0308cab451 100644 --- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json +++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json @@ -132,11 +132,6 @@ "name": "cron.database", "value": "postgres", "vartype": "string" - }, - { - "name": "session_preload_libraries", - "value": "anon", - "vartype": "string" } ] }, diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh index c97dfaa901..063664d0c6 100755 --- a/docker-compose/docker_compose_test.sh +++ b/docker-compose/docker_compose_test.sh @@ -35,11 +35,11 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do echo "clean up containers if exists" cleanup PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version)) - # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option - if [ $pg_version -eq 17 ]; then + # The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions + if [ "${pg_version}" -ne 17 ]; then SPEC_PATH="compute_wrapper/var/db/postgres/specs" mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak - jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json + jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json" fi PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d @@ -106,8 +106,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do fi fi cleanup - # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option - if [ $pg_version -eq 17 ]; then - mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json + # Restore the original spec.json + if [ "$pg_version" -ne 17 ]; then + mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json" fi done From b0e43c2f88bf81473c931e3dcf50a22d62f3ebd4 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 17 Dec 2024 11:35:00 +0100 Subject: [PATCH 25/89] postgres_ffi: add `WalStreamDecoder::complete_record()` benchmark (#10158) Touches #10097. --- Cargo.lock | 2 + libs/postgres_ffi/Cargo.toml | 6 +++ libs/postgres_ffi/benches/README.md | 26 +++++++++++++ libs/postgres_ffi/benches/waldecoder.rs | 49 +++++++++++++++++++++++++ libs/postgres_ffi/src/wal_generator.rs | 16 ++++++++ 5 files changed, 99 insertions(+) create mode 100644 libs/postgres_ffi/benches/README.md create mode 100644 libs/postgres_ffi/benches/waldecoder.rs diff --git a/Cargo.lock b/Cargo.lock index d1f7746969..5ec5253719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4401,11 +4401,13 @@ dependencies = [ "bindgen", "bytes", "crc32c", + "criterion", "env_logger", "log", "memoffset 0.9.0", "once_cell", "postgres", + "pprof", "regex", "serde", "thiserror", diff --git a/libs/postgres_ffi/Cargo.toml b/libs/postgres_ffi/Cargo.toml index e1f5443cbe..b7a376841d 100644 --- a/libs/postgres_ffi/Cargo.toml +++ b/libs/postgres_ffi/Cargo.toml @@ -9,9 +9,11 @@ regex.workspace = true bytes.workspace = true anyhow.workspace = true crc32c.workspace = true +criterion.workspace = true once_cell.workspace = true log.workspace = true memoffset.workspace = true +pprof.workspace = true thiserror.workspace = true serde.workspace = true utils.workspace = true @@ -24,3 +26,7 @@ postgres.workspace = true [build-dependencies] anyhow.workspace = true bindgen.workspace = true + +[[bench]] +name = "waldecoder" +harness = false diff --git a/libs/postgres_ffi/benches/README.md b/libs/postgres_ffi/benches/README.md new file mode 100644 index 0000000000..00a8980174 --- /dev/null +++ b/libs/postgres_ffi/benches/README.md @@ -0,0 +1,26 @@ +## Benchmarks + +To run benchmarks: + +```sh +# All benchmarks. +cargo bench --package postgres_ffi + +# Specific file. +cargo bench --package postgres_ffi --bench waldecoder + +# Specific benchmark. +cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 + +# List available benchmarks. +cargo bench --package postgres_ffi --benches -- --list + +# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds. +# Output in target/criterion/*/profile/flamegraph.svg. +cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 -- --profile-time 10 +``` + +Additional charts and statistics are available in `target/criterion/report/index.html`. + +Benchmarks are automatically compared against the previous run. To compare against other runs, see +`--baseline` and `--save-baseline`. \ No newline at end of file diff --git a/libs/postgres_ffi/benches/waldecoder.rs b/libs/postgres_ffi/benches/waldecoder.rs new file mode 100644 index 0000000000..c8cf0d322a --- /dev/null +++ b/libs/postgres_ffi/benches/waldecoder.rs @@ -0,0 +1,49 @@ +use std::ffi::CStr; + +use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use postgres_ffi::v17::wal_generator::LogicalMessageGenerator; +use postgres_ffi::v17::waldecoder_handler::WalStreamDecoderHandler; +use postgres_ffi::waldecoder::WalStreamDecoder; +use pprof::criterion::{Output, PProfProfiler}; +use utils::lsn::Lsn; + +const KB: usize = 1024; + +// Register benchmarks with Criterion. +criterion_group!( + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = bench_complete_record, +); +criterion_main!(benches); + +/// Benchmarks WalStreamDecoder::complete_record() for a logical message of varying size. +fn bench_complete_record(c: &mut Criterion) { + let mut g = c.benchmark_group("complete_record"); + for size in [64, KB, 8 * KB, 128 * KB] { + // Kind of weird to change the group throughput per benchmark, but it's the only way + // to vary it per benchmark. It works. + g.throughput(criterion::Throughput::Bytes(size as u64)); + g.bench_function(format!("size={size}"), |b| run_bench(b, size).unwrap()); + } + + fn run_bench(b: &mut Bencher, size: usize) -> anyhow::Result<()> { + const PREFIX: &CStr = c""; + let value_size = LogicalMessageGenerator::make_value_size(size, PREFIX); + let value = vec![1; value_size]; + + let mut decoder = WalStreamDecoder::new(Lsn(0), 170000); + let msg = LogicalMessageGenerator::new(PREFIX, &value) + .next() + .unwrap() + .encode(Lsn(0)); + assert_eq!(msg.len(), size); + + b.iter(|| { + let msg = msg.clone(); // Bytes::clone() is cheap + decoder.complete_record(msg).unwrap(); + }); + + Ok(()) + } +} diff --git a/libs/postgres_ffi/src/wal_generator.rs b/libs/postgres_ffi/src/wal_generator.rs index dc679eea33..69cc4b771f 100644 --- a/libs/postgres_ffi/src/wal_generator.rs +++ b/libs/postgres_ffi/src/wal_generator.rs @@ -231,6 +231,22 @@ impl LogicalMessageGenerator { }; [&header.encode(), prefix, message].concat().into() } + + /// Computes how large a value must be to get a record of the given size. Convenience method to + /// construct records of pre-determined size. Panics if the record size is too small. + pub fn make_value_size(record_size: usize, prefix: &CStr) -> usize { + let xlog_header_size = XLOG_SIZE_OF_XLOG_RECORD; + let lm_header_size = size_of::(); + let prefix_size = prefix.to_bytes_with_nul().len(); + let data_header_size = match record_size - xlog_header_size - 2 { + 0..=255 => 2, + 256..=258 => panic!("impossible record_size {record_size}"), + 259.. => 5, + }; + record_size + .checked_sub(xlog_header_size + lm_header_size + prefix_size + data_header_size) + .expect("record_size too small") + } } impl Iterator for LogicalMessageGenerator { From b5833ef2594b39b6616526b60cbc8c506e5870fc Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 17 Dec 2024 12:24:51 +0000 Subject: [PATCH 26/89] remote_storage: configurable connection pooling for ABS (#10169) ## Problem The ABS SDK's default behavior is to do no connection pooling, i.e. open and close a fresh connection for each request. Under high request rates, this can result in an accumulation of TCP connections in TIME_WAIT or CLOSE_WAIT state, and in extreme cases exhaustion of client ports. Related: https://github.com/neondatabase/cloud/issues/20971 ## Summary of changes - Add a configurable `conn_pool_size` parameter for Azure storage, defaulting to zero (current behavior) - Construct a custom reqwest client using this connection pool size. --- Cargo.lock | 1 + libs/remote_storage/Cargo.toml | 1 + libs/remote_storage/src/azure_blob.rs | 20 ++++++++++++++++++-- libs/remote_storage/src/config.rs | 14 ++++++++++++++ libs/remote_storage/tests/test_real_azure.rs | 1 + 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5ec5253719..b9b89efa02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5064,6 +5064,7 @@ dependencies = [ "once_cell", "pin-project-lite", "rand 0.8.5", + "reqwest", "scopeguard", "serde", "serde_json", diff --git a/libs/remote_storage/Cargo.toml b/libs/remote_storage/Cargo.toml index 1816825bda..33fa6e89f5 100644 --- a/libs/remote_storage/Cargo.toml +++ b/libs/remote_storage/Cargo.toml @@ -18,6 +18,7 @@ camino = { workspace = true, features = ["serde1"] } humantime-serde.workspace = true hyper = { workspace = true, features = ["client"] } futures.workspace = true +reqwest.workspace = true serde.workspace = true serde_json.workspace = true tokio = { workspace = true, features = ["sync", "fs", "io-util"] } diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 19c8251ccd..c89f50ef2b 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -8,6 +8,7 @@ use std::io; use std::num::NonZeroU32; use std::pin::Pin; use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use std::time::SystemTime; @@ -15,6 +16,8 @@ use super::REMOTE_STORAGE_PREFIX_SEPARATOR; use anyhow::Context; use anyhow::Result; use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range}; +use azure_core::HttpClient; +use azure_core::TransportOptions; use azure_core::{Continuable, RetryOptions}; use azure_storage::StorageCredentials; use azure_storage_blobs::blob::CopyStatus; @@ -80,8 +83,13 @@ impl AzureBlobStorage { StorageCredentials::token_credential(token_credential) }; - // we have an outer retry - let builder = ClientBuilder::new(account, credentials).retry(RetryOptions::none()); + let builder = ClientBuilder::new(account, credentials) + // we have an outer retry + .retry(RetryOptions::none()) + // Customize transport to configure conneciton pooling + .transport(TransportOptions::new(Self::reqwest_client( + azure_config.conn_pool_size, + ))); let client = builder.container_client(azure_config.container_name.to_owned()); @@ -106,6 +114,14 @@ impl AzureBlobStorage { }) } + fn reqwest_client(conn_pool_size: usize) -> Arc { + let client = reqwest::ClientBuilder::new() + .pool_max_idle_per_host(conn_pool_size) + .build() + .expect("failed to build `reqwest` client"); + Arc::new(client) + } + pub fn relative_path_to_name(&self, path: &RemotePath) -> String { assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR); let path_string = path.get_path().as_str(); diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs index f6ef31077c..dd49d4d5e7 100644 --- a/libs/remote_storage/src/config.rs +++ b/libs/remote_storage/src/config.rs @@ -114,6 +114,16 @@ fn default_max_keys_per_list_response() -> Option { DEFAULT_MAX_KEYS_PER_LIST_RESPONSE } +fn default_azure_conn_pool_size() -> usize { + // Conservative default: no connection pooling. At time of writing this is the Azure + // SDK's default as well, due to historic reports of hard-to-reproduce issues + // (https://github.com/hyperium/hyper/issues/2312) + // + // However, using connection pooling is important to avoid exhausting client ports when + // doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971) + 0 +} + impl Debug for S3Config { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("S3Config") @@ -146,6 +156,8 @@ pub struct AzureConfig { pub concurrency_limit: NonZeroUsize, #[serde(default = "default_max_keys_per_list_response")] pub max_keys_per_list_response: Option, + #[serde(default = "default_azure_conn_pool_size")] + pub conn_pool_size: usize, } fn default_remote_storage_azure_concurrency_limit() -> NonZeroUsize { @@ -302,6 +314,7 @@ timeout = '5s'"; container_region = 'westeurope' upload_storage_class = 'INTELLIGENT_TIERING' timeout = '7s' + conn_pool_size = 8 "; let config = parse(toml).unwrap(); @@ -316,6 +329,7 @@ timeout = '5s'"; prefix_in_container: None, concurrency_limit: default_remote_storage_azure_concurrency_limit(), max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, + conn_pool_size: 8, }), timeout: Duration::from_secs(7), small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs index 92d579fec8..15004dbf83 100644 --- a/libs/remote_storage/tests/test_real_azure.rs +++ b/libs/remote_storage/tests/test_real_azure.rs @@ -218,6 +218,7 @@ async fn create_azure_client( prefix_in_container: Some(format!("test_{millis}_{random:08x}/")), concurrency_limit: NonZeroUsize::new(100).unwrap(), max_keys_per_list_response, + conn_pool_size: 8, }), timeout: RemoteStorageConfig::DEFAULT_TIMEOUT, small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT, From 2dfd3cab8cd7840afee57acffaa45aa803d04f20 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Tue, 17 Dec 2024 17:14:07 +0100 Subject: [PATCH 27/89] fix(compute): Report compute_backpressure_throttling_seconds as counter (#10125) ## Problem It was reported as `gauge`, but it's actually a `counter`. Also add `_total` suffix as that's the convention for counters. The corresponding flux-fleet PR: https://github.com/neondatabase/flux-fleet/pull/386 --- compute/etc/neon_collector.jsonnet | 2 +- ...compute_backpressure_throttling_seconds_total.libsonnet} | 6 +++--- ...ql => compute_backpressure_throttling_seconds_total.sql} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename compute/etc/sql_exporter/{compute_backpressure_throttling_seconds.libsonnet => compute_backpressure_throttling_seconds_total.libsonnet} (61%) rename compute/etc/sql_exporter/{compute_backpressure_throttling_seconds.sql => compute_backpressure_throttling_seconds_total.sql} (100%) diff --git a/compute/etc/neon_collector.jsonnet b/compute/etc/neon_collector.jsonnet index aa6cc1cfc8..f8f4cab63b 100644 --- a/compute/etc/neon_collector.jsonnet +++ b/compute/etc/neon_collector.jsonnet @@ -3,7 +3,7 @@ metrics: [ import 'sql_exporter/checkpoints_req.libsonnet', import 'sql_exporter/checkpoints_timed.libsonnet', - import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet', + import 'sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet', import 'sql_exporter/compute_current_lsn.libsonnet', import 'sql_exporter/compute_logical_snapshot_files.libsonnet', import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet', diff --git a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.libsonnet b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet similarity index 61% rename from compute/etc/sql_exporter/compute_backpressure_throttling_seconds.libsonnet rename to compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet index 02c803cfa6..31725bd179 100644 --- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.libsonnet +++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet @@ -1,10 +1,10 @@ { - metric_name: 'compute_backpressure_throttling_seconds', - type: 'gauge', + metric_name: 'compute_backpressure_throttling_seconds_total', + type: 'counter', help: 'Time compute has spent throttled', key_labels: null, values: [ 'throttled', ], - query: importstr 'sql_exporter/compute_backpressure_throttling_seconds.sql', + query: importstr 'sql_exporter/compute_backpressure_throttling_seconds_total.sql', } diff --git a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds.sql b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql similarity index 100% rename from compute/etc/sql_exporter/compute_backpressure_throttling_seconds.sql rename to compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql From 007b13b79af0fa75151a8fba58a65d3c4933942f Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Tue, 17 Dec 2024 16:43:54 +0000 Subject: [PATCH 28/89] Don't build tests in compute image, use ninja (#10149) Don't build tests in h3 and rdkit: ~15 min speedup. Use Ninja as cmake generator where possible: ~10 min speedup. Clean apt cache for smaller images: around 250mb size loss for intermediate layers --- compute/compute-node.Dockerfile | 78 ++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 33d2a10285..1e11efeaf8 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -35,10 +35,12 @@ RUN case $DEBIAN_VERSION in \ ;; \ esac && \ apt update && \ - apt install --no-install-recommends -y git autoconf automake libtool build-essential bison flex libreadline-dev \ + apt install --no-install-recommends --no-install-suggests -y \ + ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \ libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \ - $VERSION_INSTALLS + $VERSION_INSTALLS \ + && apt clean && rm -rf /var/lib/apt/lists/* ######################################################################################### # @@ -113,10 +115,12 @@ ARG DEBIAN_VERSION ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN apt update && \ - apt install --no-install-recommends -y gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \ + apt install --no-install-recommends --no-install-suggests -y \ + gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \ libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \ libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \ - protobuf-c-compiler xsltproc + protobuf-c-compiler xsltproc \ + && apt clean && rm -rf /var/lib/apt/lists/* # Postgis 3.5.0 requires SFCGAL 1.4+ @@ -143,9 +147,9 @@ RUN case "${DEBIAN_VERSION}" in \ wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \ echo "${SFCGAL_CHECKSUM} SFCGAL.tar.gz" | sha256sum --check && \ mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \ - cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \ - DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \ - make clean && cp -R /sfcgal/* / + cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \ + DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \ + ninja clean && cp -R /sfcgal/* / ENV PATH="/usr/local/pgsql/bin:$PATH" @@ -213,9 +217,9 @@ RUN case "${PG_VERSION}" in \ echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \ mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \ mkdir build && cd build && \ - cmake -DCMAKE_BUILD_TYPE=Release .. && \ - make -j $(getconf _NPROCESSORS_ONLN) && \ - make -j $(getconf _NPROCESSORS_ONLN) install && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \ + ninja -j $(getconf _NPROCESSORS_ONLN) && \ + ninja -j $(getconf _NPROCESSORS_ONLN) install && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \ find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\ cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \ @@ -235,7 +239,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY compute/patches/plv8-3.1.10.patch /plv8-3.1.10.patch RUN apt update && \ - apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang + apt install --no-install-recommends --no-install-suggests -y \ + ninja-build python3-dev libncurses5 binutils clang \ + && apt clean && rm -rf /var/lib/apt/lists/* # plv8 3.2.3 supports v17 # last release v3.2.3 - Sep 7, 2024 @@ -301,9 +307,10 @@ RUN mkdir -p /h3/usr/ && \ echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \ mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \ mkdir build && cd build && \ - cmake .. -DCMAKE_BUILD_TYPE=Release && \ - make -j $(getconf _NPROCESSORS_ONLN) && \ - DESTDIR=/h3 make install && \ + cmake .. -GNinja -DBUILD_BENCHMARKS=0 -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_FUZZERS=0 -DBUILD_FILTERS=0 -DBUILD_GENERATORS=0 -DBUILD_TESTING=0 \ + && ninja -j $(getconf _NPROCESSORS_ONLN) && \ + DESTDIR=/h3 ninja install && \ cp -R /h3/usr / && \ rm -rf build @@ -650,14 +657,15 @@ FROM build-deps AS rdkit-pg-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -RUN apt-get update && \ - apt-get install --no-install-recommends -y \ +RUN apt update && \ + apt install --no-install-recommends --no-install-suggests -y \ libboost-iostreams1.74-dev \ libboost-regex1.74-dev \ libboost-serialization1.74-dev \ libboost-system1.74-dev \ libeigen3-dev \ - libboost-all-dev + libboost-all-dev \ + && apt clean && rm -rf /var/lib/apt/lists/* # rdkit Release_2024_09_1 supports v17 # last release Release_2024_09_1 - Sep 27, 2024 @@ -693,6 +701,8 @@ RUN case "${PG_VERSION}" in \ -D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \ -D RDK_BUILD_YAEHMOP_SUPPORT=OFF \ -D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \ + -D RDK_TEST_MULTITHREADED=OFF \ + -D RDK_BUILD_CPP_TESTS=OFF \ -D RDK_USE_URF=OFF \ -D RDK_BUILD_PGSQL=ON \ -D RDK_PGSQL_STATIC=ON \ @@ -704,9 +714,10 @@ RUN case "${PG_VERSION}" in \ -D RDK_INSTALL_COMIC_FONTS=OFF \ -D RDK_BUILD_FREETYPE_SUPPORT=OFF \ -D CMAKE_BUILD_TYPE=Release \ + -GNinja \ . && \ - make -j $(getconf _NPROCESSORS_ONLN) && \ - make -j $(getconf _NPROCESSORS_ONLN) install && \ + ninja -j $(getconf _NPROCESSORS_ONLN) && \ + ninja -j $(getconf _NPROCESSORS_ONLN) install && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control ######################################################################################### @@ -849,8 +860,9 @@ FROM build-deps AS rust-extensions-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -RUN apt-get update && \ - apt-get install --no-install-recommends -y curl libclang-dev && \ +RUN apt update && \ + apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \ + apt clean && rm -rf /var/lib/apt/lists/* && \ useradd -ms /bin/bash nonroot -b /home ENV HOME=/home/nonroot @@ -885,8 +897,9 @@ FROM build-deps AS rust-extensions-build-pgrx12 ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -RUN apt-get update && \ - apt-get install --no-install-recommends -y curl libclang-dev && \ +RUN apt update && \ + apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \ + apt clean && rm -rf /var/lib/apt/lists/* && \ useradd -ms /bin/bash nonroot -b /home ENV HOME=/home/nonroot @@ -914,18 +927,22 @@ FROM rust-extensions-build-pgrx12 AS pg-onnx-build # cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25). # Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise -RUN apt-get update && apt-get install -y python3 python3-pip python3-venv && \ +RUN apt update && apt install --no-install-recommends --no-install-suggests -y \ + python3 python3-pip python3-venv && \ + apt clean && rm -rf /var/lib/apt/lists/* && \ python3 -m venv venv && \ . venv/bin/activate && \ python3 -m pip install cmake==3.30.5 && \ wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \ mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \ - ./build.sh --config Release --parallel --skip_submodule_sync --skip_tests --allow_running_as_root + ./build.sh --config Release --parallel --cmake_generator Ninja \ + --skip_submodule_sync --skip_tests --allow_running_as_root FROM pg-onnx-build AS pgrag-pg-build -RUN apt-get install -y protobuf-compiler && \ +RUN apt update && apt install --no-install-recommends --no-install-suggests -y protobuf-compiler \ + && apt clean && rm -rf /var/lib/apt/lists/* && \ wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \ echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \ mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \ @@ -1279,8 +1296,8 @@ COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_ FROM debian:$DEBIAN_FLAVOR AS pgbouncer RUN set -e \ - && apt-get update \ - && apt-get install --no-install-recommends -y \ + && apt update \ + && apt install --no-install-suggests --no-install-recommends -y \ build-essential \ git \ ca-certificates \ @@ -1288,7 +1305,8 @@ RUN set -e \ automake \ libevent-dev \ libtool \ - pkg-config + pkg-config \ + && apt clean && rm -rf /var/lib/apt/lists/* # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) ENV PGBOUNCER_TAG=pgbouncer_1_22_1 @@ -1519,7 +1537,7 @@ RUN apt update && \ procps \ ca-certificates \ $VERSION_INSTALLS && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ + apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 # s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2 From a55853f67fad71c06fd0b856ada7cf8fea4306bd Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 17 Dec 2024 17:51:58 +0100 Subject: [PATCH 29/89] utils: symbolize heap profiles (#10153) ## Problem Jemalloc heap profiles aren't symbolized. This is inconvenient, and doesn't work with Grafana Cloud Profiles. Resolves #9964. ## Summary of changes Symbolize the heap profiles in-process, and strip unnecessary cruft. This uses about 100 MB additional memory to cache the DWARF information, but I believe this is already the case with CPU profiles, which use the same library for symbolization. With cached DWARF information, the symbolization CPU overhead is negligible. Example profiles: * [pageserver.pb.gz](https://github.com/user-attachments/files/18141395/pageserver.pb.gz) * [safekeeper.pb.gz](https://github.com/user-attachments/files/18141396/safekeeper.pb.gz) --- Cargo.lock | 105 +++++++++++------- Cargo.toml | 1 + libs/utils/Cargo.toml | 5 +- libs/utils/src/http/endpoint.rs | 46 ++++++-- libs/utils/src/lib.rs | 2 + libs/utils/src/pprof.rs | 190 ++++++++++++++++++++++++++++++++ 6 files changed, 298 insertions(+), 51 deletions(-) create mode 100644 libs/utils/src/pprof.rs diff --git a/Cargo.lock b/Cargo.lock index b9b89efa02..d9ac167042 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" [[package]] name = "addr2line" -version = "0.21.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] @@ -23,6 +23,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "ahash" version = "0.8.11" @@ -871,17 +877,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.8.0", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -1127,7 +1133,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -2107,7 +2113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.7.1", ] [[package]] @@ -2308,9 +2314,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "git-version" @@ -3404,6 +3410,15 @@ dependencies = [ "adler", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "0.8.11" @@ -3638,9 +3653,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] @@ -5323,9 +5338,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" @@ -7219,6 +7234,7 @@ dependencies = [ "anyhow", "arc-swap", "async-compression", + "backtrace", "bincode", "byteorder", "bytes", @@ -7229,12 +7245,14 @@ dependencies = [ "criterion", "diatomic-waker", "fail", + "flate2", "futures", "git-version", "hex", "hex-literal", "humantime", "hyper 0.14.30", + "itertools 0.10.5", "jemalloc_pprof", "jsonwebtoken", "metrics", @@ -7591,7 +7609,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -7600,7 +7618,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -7618,7 +7636,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.6", ] [[package]] @@ -7638,17 +7656,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -7659,9 +7678,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -7671,9 +7690,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -7683,9 +7702,15 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -7695,9 +7720,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -7707,9 +7732,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -7719,9 +7744,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -7731,9 +7756,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" diff --git a/Cargo.toml b/Cargo.toml index 056cd5798f..885f02ba81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ anyhow = { version = "1.0", features = ["backtrace"] } arc-swap = "1.6" async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] } atomic-take = "1.1.0" +backtrace = "0.3.74" flate2 = "1.0.26" async-stream = "0.3" async-trait = "0.1" diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 66500fb141..02bf77760a 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -15,17 +15,20 @@ arc-swap.workspace = true sentry.workspace = true async-compression.workspace = true anyhow.workspace = true +backtrace.workspace = true bincode.workspace = true bytes.workspace = true camino.workspace = true chrono.workspace = true diatomic-waker.workspace = true +flate2.workspace = true git-version.workspace = true hex = { workspace = true, features = ["serde"] } humantime.workspace = true hyper0 = { workspace = true, features = ["full"] } +itertools.workspace = true fail.workspace = true -futures = { workspace = true} +futures = { workspace = true } jemalloc_pprof.workspace = true jsonwebtoken.workspace = true nix.workspace = true diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index d975b63677..9b37b69939 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -1,15 +1,22 @@ use crate::auth::{AuthError, Claims, SwappableJwtAuth}; use crate::http::error::{api_error_handler, route_error_handler, ApiError}; use crate::http::request::{get_query_param, parse_query_param}; +use crate::pprof; +use ::pprof::protos::Message as _; +use ::pprof::ProfilerGuardBuilder; use anyhow::{anyhow, Context}; +use bytes::{Bytes, BytesMut}; use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION}; use hyper::http::HeaderValue; use hyper::Method; use hyper::{header::CONTENT_TYPE, Body, Request, Response}; use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder}; use once_cell::sync::Lazy; +use regex::Regex; use routerify::ext::RequestExt; use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; +use tokio::sync::{mpsc, Mutex}; +use tokio_stream::wrappers::ReceiverStream; use tokio_util::io::ReaderStream; use tracing::{debug, info, info_span, warn, Instrument}; @@ -18,11 +25,6 @@ use std::io::Write as _; use std::str::FromStr; use std::time::Duration; -use bytes::{Bytes, BytesMut}; -use pprof::protos::Message as _; -use tokio::sync::{mpsc, Mutex}; -use tokio_stream::wrappers::ReceiverStream; - static SERVE_METRICS_COUNT: Lazy = Lazy::new(|| { register_int_counter!( "libmetrics_metric_handler_requests_total", @@ -365,7 +367,7 @@ pub async fn profile_cpu_handler(req: Request) -> Result, A // Take the profile. let report = tokio::task::spawn_blocking(move || { - let guard = pprof::ProfilerGuardBuilder::default() + let guard = ProfilerGuardBuilder::default() .frequency(frequency_hz) .blocklist(&["libc", "libgcc", "pthread", "vdso"]) .build()?; @@ -457,10 +459,34 @@ pub async fn profile_heap_handler(req: Request) -> Result, } Format::Pprof => { - let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof()) - .await - .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? - .map_err(ApiError::InternalServerError)?; + let data = tokio::task::spawn_blocking(move || { + let bytes = prof_ctl.dump_pprof()?; + // Symbolize the profile. + // TODO: consider moving this upstream to jemalloc_pprof and avoiding the + // serialization roundtrip. + static STRIP_FUNCTIONS: Lazy> = Lazy::new(|| { + // Functions to strip from profiles. If true, also remove child frames. + vec![ + (Regex::new("^__rust").unwrap(), false), + (Regex::new("^_start$").unwrap(), false), + (Regex::new("^irallocx_prof").unwrap(), true), + (Regex::new("^prof_alloc_prep").unwrap(), true), + (Regex::new("^std::rt::lang_start").unwrap(), false), + (Regex::new("^std::sys::backtrace::__rust").unwrap(), false), + ] + }); + let profile = pprof::decode(&bytes)?; + let profile = pprof::symbolize(profile)?; + let profile = pprof::strip_locations( + profile, + &["libc", "libgcc", "pthread", "vdso"], + &STRIP_FUNCTIONS, + ); + pprof::encode(&profile) + }) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; Response::builder() .status(200) .header(CONTENT_TYPE, "application/octet-stream") diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index bccd0e0488..2c56dd750f 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -96,6 +96,8 @@ pub mod circuit_breaker; pub mod try_rcu; +pub mod pprof; + // Re-export used in macro. Avoids adding git-version as dep in target crates. #[doc(hidden)] pub use git_version; diff --git a/libs/utils/src/pprof.rs b/libs/utils/src/pprof.rs new file mode 100644 index 0000000000..90910897bf --- /dev/null +++ b/libs/utils/src/pprof.rs @@ -0,0 +1,190 @@ +use flate2::write::{GzDecoder, GzEncoder}; +use flate2::Compression; +use itertools::Itertools as _; +use once_cell::sync::Lazy; +use pprof::protos::{Function, Line, Message as _, Profile}; +use regex::Regex; + +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; +use std::ffi::c_void; +use std::io::Write as _; + +/// Decodes a gzip-compressed Protobuf-encoded pprof profile. +pub fn decode(bytes: &[u8]) -> anyhow::Result { + let mut gz = GzDecoder::new(Vec::new()); + gz.write_all(bytes)?; + Ok(Profile::parse_from_bytes(&gz.finish()?)?) +} + +/// Encodes a pprof profile as gzip-compressed Protobuf. +pub fn encode(profile: &Profile) -> anyhow::Result> { + let mut gz = GzEncoder::new(Vec::new(), Compression::default()); + profile.write_to_writer(&mut gz)?; + Ok(gz.finish()?) +} + +/// Symbolizes a pprof profile using the current binary. +pub fn symbolize(mut profile: Profile) -> anyhow::Result { + if !profile.function.is_empty() { + return Ok(profile); // already symbolized + } + + // Collect function names. + let mut functions: HashMap = HashMap::new(); + let mut strings: HashMap = profile + .string_table + .into_iter() + .enumerate() + .map(|(i, s)| (s, i as i64)) + .collect(); + + // Helper to look up or register a string. + let mut string_id = |s: &str| -> i64 { + // Don't use .entry() to avoid unnecessary allocations. + if let Some(id) = strings.get(s) { + return *id; + } + let id = strings.len() as i64; + strings.insert(s.to_string(), id); + id + }; + + for loc in &mut profile.location { + if !loc.line.is_empty() { + continue; + } + + // Resolve the line and function for each location. + backtrace::resolve(loc.address as *mut c_void, |symbol| { + let Some(symname) = symbol.name() else { + return; + }; + let mut name = symname.to_string(); + + // Strip the Rust monomorphization suffix from the symbol name. + static SUFFIX_REGEX: Lazy = + Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex")); + if let Some(m) = SUFFIX_REGEX.find(&name) { + name.truncate(m.start()); + } + + let function_id = match functions.get(&name) { + Some(function) => function.id, + None => { + let id = functions.len() as u64 + 1; + let system_name = String::from_utf8_lossy(symname.as_bytes()); + let filename = symbol + .filename() + .map(|path| path.to_string_lossy()) + .unwrap_or(Cow::Borrowed("")); + let function = Function { + id, + name: string_id(&name), + system_name: string_id(&system_name), + filename: string_id(&filename), + ..Default::default() + }; + functions.insert(name, function); + id + } + }; + loc.line.push(Line { + function_id, + line: symbol.lineno().unwrap_or(0) as i64, + ..Default::default() + }); + }); + } + + // Store the resolved functions, and mark the mapping as resolved. + profile.function = functions.into_values().sorted_by_key(|f| f.id).collect(); + profile.string_table = strings + .into_iter() + .sorted_by_key(|(_, i)| *i) + .map(|(s, _)| s) + .collect(); + + for mapping in &mut profile.mapping { + mapping.has_functions = true; + mapping.has_filenames = true; + } + + Ok(profile) +} + +/// Strips locations (stack frames) matching the given mappings (substring) or function names +/// (regex). The function bool specifies whether child frames should be stripped as well. +/// +/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all +/// string references. +pub fn strip_locations( + mut profile: Profile, + mappings: &[&str], + functions: &[(Regex, bool)], +) -> Profile { + // Strip mappings. + let mut strip_mappings: HashSet = HashSet::new(); + + profile.mapping.retain(|mapping| { + let Some(name) = profile.string_table.get(mapping.filename as usize) else { + return true; + }; + if mappings.iter().any(|substr| name.contains(substr)) { + strip_mappings.insert(mapping.id); + return false; + } + true + }); + + // Strip functions. + let mut strip_functions: HashMap = HashMap::new(); + + profile.function.retain(|function| { + let Some(name) = profile.string_table.get(function.name as usize) else { + return true; + }; + for (regex, strip_children) in functions { + if regex.is_match(name) { + strip_functions.insert(function.id, *strip_children); + return false; + } + } + true + }); + + // Strip locations. The bool specifies whether child frames should be stripped too. + let mut strip_locations: HashMap = HashMap::new(); + + profile.location.retain(|location| { + for line in &location.line { + if let Some(strip_children) = strip_functions.get(&line.function_id) { + strip_locations.insert(location.id, *strip_children); + return false; + } + } + if strip_mappings.contains(&location.mapping_id) { + strip_locations.insert(location.id, false); + return false; + } + true + }); + + // Strip sample locations. + for sample in &mut profile.sample { + // First, find the uppermost function with child removal and truncate the stack. + if let Some(truncate) = sample + .location_id + .iter() + .rposition(|id| strip_locations.get(id) == Some(&true)) + { + sample.location_id.drain(..=truncate); + } + // Next, strip any individual frames without child removal. + sample + .location_id + .retain(|id| !strip_locations.contains_key(id)); + } + + profile +} From 7dddbb9570ca52d174b7d53ca2495fb272796a7f Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Tue, 17 Dec 2024 12:36:55 -0600 Subject: [PATCH 30/89] Add pg_repack extension (#10100) Our solutions engineers and some customers would like to have this extension available. Link: https://github.com/neondatabase/cloud/issues/18890 Signed-off-by: Tristan Partin --- compute/compute-node.Dockerfile | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 1e11efeaf8..9f1f3b7343 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1185,6 +1185,25 @@ RUN case "${PG_VERSION}" in \ make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control +######################################################################################### +# +# Layer "pg_repack" +# compile pg_repack extension +# +######################################################################################### + +FROM build-deps AS pg-repack-build +ARG PG_VERSION +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ + +ENV PATH="/usr/local/pgsql/bin/:$PATH" + +RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \ + echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \ + mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C . && \ + make -j $(getconf _NPROCESSORS_ONLN) && \ + make -j $(getconf _NPROCESSORS_ONLN) install + ######################################################################################### # # Layer "neon-pg-ext-build" @@ -1230,6 +1249,7 @@ COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=pg-repack-build /usr/local/pgsql/ /usr/local/pgsql/ COPY pgxn/ pgxn/ RUN make -j $(getconf _NPROCESSORS_ONLN) \ From 93e958341fc47317a0b430b40ae4906da7294d2f Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Tue, 17 Dec 2024 21:26:54 +0200 Subject: [PATCH 31/89] [proxy]: Use TLS for cancellation queries (#10152) ## Problem pg_sni_router assumes that all the streams are upgradable to TLS. Cancellation requests were declined because of using NoTls config. ## Summary of changes Provide TLS client config for cancellation requests. Fixes [#21789](https://github.com/orgs/neondatabase/projects/65/views/1?pane=issue&itemId=90911361&issue=neondatabase%7Ccloud%7C21789) --- proxy/src/bin/pg_sni_router.rs | 2 +- proxy/src/cancellation.rs | 61 ++++++++++++++++++++++++++++++++-- proxy/src/compute.rs | 6 ++-- 3 files changed, 63 insertions(+), 6 deletions(-) diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs index 623a0fd3b2..9538384b9e 100644 --- a/proxy/src/bin/pg_sni_router.rs +++ b/proxy/src/bin/pg_sni_router.rs @@ -229,7 +229,7 @@ async fn ssl_handshake( let (raw, read_buf) = stream.into_inner(); // TODO: Normally, client doesn't send any data before - // server says TLS handshake is ok and read_buf is empy. + // server says TLS handshake is ok and read_buf is empty. // However, you could imagine pipelining of postgres // SSLRequest + TLS ClientHello in one hunk similar to // pipelining in our node js driver. We should probably diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index dd3edd6abc..a58e3961da 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -3,8 +3,10 @@ use std::sync::Arc; use dashmap::DashMap; use ipnet::{IpNet, Ipv4Net, Ipv6Net}; -use postgres_client::{CancelToken, NoTls}; +use once_cell::sync::OnceCell; +use postgres_client::{tls::MakeTlsConnect, CancelToken}; use pq_proto::CancelKeyData; +use rustls::crypto::ring; use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; @@ -20,6 +22,9 @@ use crate::redis::cancellation_publisher::{ CancellationPublisher, CancellationPublisherMut, RedisPublisherClient, }; +use crate::compute::{load_certs, AcceptEverythingVerifier}; +use crate::postgres_rustls::MakeRustlsConnect; + pub type CancelMap = Arc>>; pub type CancellationHandlerMain = CancellationHandler>>>; pub(crate) type CancellationHandlerMainInternal = Option>>; @@ -174,7 +179,10 @@ impl CancellationHandler

{ source: self.from, kind: crate::metrics::CancellationOutcome::Found, }); - info!("cancelling query per user's request using key {key}"); + info!( + "cancelling query per user's request using key {key}, hostname {}, address: {}", + cancel_closure.hostname, cancel_closure.socket_addr + ); cancel_closure.try_cancel_query().await } @@ -221,6 +229,8 @@ impl CancellationHandler>>> { } } +static TLS_ROOTS: OnceCell> = OnceCell::new(); + /// This should've been a [`std::future::Future`], but /// it's impossible to name a type of an unboxed future /// (we'd need something like `#![feature(type_alias_impl_trait)]`). @@ -229,6 +239,8 @@ pub struct CancelClosure { socket_addr: SocketAddr, cancel_token: CancelToken, ip_allowlist: Vec, + hostname: String, // for pg_sni router + allow_self_signed_compute: bool, } impl CancelClosure { @@ -236,17 +248,60 @@ impl CancelClosure { socket_addr: SocketAddr, cancel_token: CancelToken, ip_allowlist: Vec, + hostname: String, + allow_self_signed_compute: bool, ) -> Self { Self { socket_addr, cancel_token, ip_allowlist, + hostname, + allow_self_signed_compute, } } /// Cancels the query running on user's compute node. pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; - self.cancel_token.cancel_query_raw(socket, NoTls).await?; + + let client_config = if self.allow_self_signed_compute { + // Allow all certificates for creating the connection. Used only for tests + let verifier = Arc::new(AcceptEverythingVerifier); + rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_safe_default_protocol_versions() + .expect("ring should support the default protocol versions") + .dangerous() + .with_custom_certificate_verifier(verifier) + } else { + let root_store = TLS_ROOTS + .get_or_try_init(load_certs) + .map_err(|_e| { + CancelError::IO(std::io::Error::new( + std::io::ErrorKind::Other, + "TLS root store initialization failed".to_string(), + )) + })? + .clone(); + rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_safe_default_protocol_versions() + .expect("ring should support the default protocol versions") + .with_root_certificates(root_store) + }; + + let client_config = client_config.with_no_client_auth(); + + let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config); + let tls = >::make_tls_connect( + &mut mk_tls, + &self.hostname, + ) + .map_err(|e| { + CancelError::IO(std::io::Error::new( + std::io::ErrorKind::Other, + e.to_string(), + )) + })?; + + self.cancel_token.cancel_query_raw(socket, tls).await?; debug!("query was cancelled"); Ok(()) } diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 4113b5bb80..42df5ff5e3 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -319,6 +319,8 @@ impl ConnCfg { secret_key, }, vec![], + host.to_string(), + allow_self_signed_compute, ); let connection = PostgresConnection { @@ -350,7 +352,7 @@ fn filtered_options(options: &str) -> Option { Some(options) } -fn load_certs() -> Result, Vec> { +pub(crate) fn load_certs() -> Result, Vec> { let der_certs = rustls_native_certs::load_native_certs(); if !der_certs.errors.is_empty() { @@ -364,7 +366,7 @@ fn load_certs() -> Result, Vec> = OnceCell::new(); #[derive(Debug)] -struct AcceptEverythingVerifier; +pub(crate) struct AcceptEverythingVerifier; impl ServerCertVerifier for AcceptEverythingVerifier { fn supported_verify_schemes(&self) -> Vec { use rustls::SignatureScheme; From fd230227f2fd370821610f91601c9b02dda23e0d Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 17 Dec 2024 20:04:09 +0000 Subject: [PATCH 32/89] storcon: include preferred AZ in compute notifications (#9953) ## Problem It is unreliable for the control plane to infer the AZ for computes from where the tenant is currently attached, because if a tenant happens to be in a degraded state or a release is ongoing while a compute starts, then the tenant's attached AZ can be a different one to where it will run long-term, and the control plane doesn't check back later to restart the compute. This can land in parallel with https://github.com/neondatabase/neon/pull/9947 ## Summary of changes - Thread through the preferred AZ into the compute hook code via the reconciler - Include the preferred AZ in the body of compute hook notifications --- storage_controller/src/compute_hook.rs | 138 +++++++++++++----- storage_controller/src/reconciler.rs | 15 +- storage_controller/src/service.rs | 25 +++- storage_controller/src/tenant_shard.rs | 1 + test_runner/fixtures/neon_fixtures.py | 5 +- test_runner/regress/test_sharding.py | 3 + .../regress/test_storage_controller.py | 5 + 7 files changed, 139 insertions(+), 53 deletions(-) diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index 2b2ece3f02..69db48f8d1 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::error::Error as _; use std::sync::Arc; use std::{collections::HashMap, time::Duration}; @@ -6,6 +7,7 @@ use control_plane::endpoint::{ComputeControlPlane, EndpointStatus}; use control_plane::local_env::LocalEnv; use futures::StreamExt; use hyper::StatusCode; +use pageserver_api::controller_api::AvailabilityZone; use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId}; use postgres_connection::parse_host_port; use serde::{Deserialize, Serialize}; @@ -28,6 +30,9 @@ struct UnshardedComputeHookTenant { // Which node is this tenant attached to node_id: NodeId, + // The tenant's preferred AZ, so that we may pass this on to the control plane + preferred_az: Option, + // Must hold this lock to send a notification. send_lock: Arc>>, } @@ -36,6 +41,9 @@ struct ShardedComputeHookTenant { shard_count: ShardCount, shards: Vec<(ShardNumber, NodeId)>, + // The tenant's preferred AZ, so that we may pass this on to the control plane + preferred_az: Option, + // Must hold this lock to send a notification. The contents represent // the last successfully sent notification, and are used to coalesce multiple // updates by only sending when there is a chance since our last successful send. @@ -64,17 +72,24 @@ enum ComputeHookTenant { impl ComputeHookTenant { /// Construct with at least one shard's information - fn new(tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize, node_id: NodeId) -> Self { + fn new( + tenant_shard_id: TenantShardId, + stripe_size: ShardStripeSize, + preferred_az: Option, + node_id: NodeId, + ) -> Self { if tenant_shard_id.shard_count.count() > 1 { Self::Sharded(ShardedComputeHookTenant { shards: vec![(tenant_shard_id.shard_number, node_id)], stripe_size, shard_count: tenant_shard_id.shard_count, + preferred_az, send_lock: Arc::default(), }) } else { Self::Unsharded(UnshardedComputeHookTenant { node_id, + preferred_az, send_lock: Arc::default(), }) } @@ -120,15 +135,20 @@ impl ComputeHookTenant { /// Set one shard's location. If stripe size or shard count have changed, Self is reset /// and drops existing content. - fn update( - &mut self, - tenant_shard_id: TenantShardId, - stripe_size: ShardStripeSize, - node_id: NodeId, - ) { + fn update(&mut self, shard_update: ShardUpdate) { + let tenant_shard_id = shard_update.tenant_shard_id; + let node_id = shard_update.node_id; + let stripe_size = shard_update.stripe_size; + let preferred_az = shard_update.preferred_az; + match self { Self::Unsharded(unsharded_tenant) if tenant_shard_id.shard_count.count() == 1 => { - unsharded_tenant.node_id = node_id + unsharded_tenant.node_id = node_id; + if unsharded_tenant.preferred_az.as_ref() + != preferred_az.as_ref().map(|az| az.as_ref()) + { + unsharded_tenant.preferred_az = preferred_az.map(|az| az.as_ref().clone()); + } } Self::Sharded(sharded_tenant) if sharded_tenant.stripe_size == stripe_size @@ -146,10 +166,21 @@ impl ComputeHookTenant { .push((tenant_shard_id.shard_number, node_id)); sharded_tenant.shards.sort_by_key(|s| s.0) } + + if sharded_tenant.preferred_az.as_ref() + != preferred_az.as_ref().map(|az| az.as_ref()) + { + sharded_tenant.preferred_az = preferred_az.map(|az| az.as_ref().clone()); + } } _ => { // Shard count changed: reset struct. - *self = Self::new(tenant_shard_id, stripe_size, node_id); + *self = Self::new( + tenant_shard_id, + stripe_size, + preferred_az.map(|az| az.into_owned()), + node_id, + ); } } } @@ -165,6 +196,7 @@ struct ComputeHookNotifyRequestShard { #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] struct ComputeHookNotifyRequest { tenant_id: TenantId, + preferred_az: Option, stripe_size: Option, shards: Vec, } @@ -238,6 +270,10 @@ impl ComputeHookTenant { node_id: unsharded_tenant.node_id, }], stripe_size: None, + preferred_az: unsharded_tenant + .preferred_az + .as_ref() + .map(|az| az.0.clone()), }), Self::Sharded(sharded_tenant) if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize => @@ -253,6 +289,7 @@ impl ComputeHookTenant { }) .collect(), stripe_size: Some(sharded_tenant.stripe_size), + preferred_az: sharded_tenant.preferred_az.as_ref().map(|az| az.0.clone()), }) } Self::Sharded(sharded_tenant) => { @@ -313,6 +350,17 @@ pub(super) struct ComputeHook { client: reqwest::Client, } +/// Callers may give us a list of these when asking us to send a bulk batch +/// of notifications in the background. This is a 'notification' in the sense of +/// other code notifying us of a shard's status, rather than being the final notification +/// that we send upwards to the control plane for the whole tenant. +pub(crate) struct ShardUpdate<'a> { + pub(crate) tenant_shard_id: TenantShardId, + pub(crate) node_id: NodeId, + pub(crate) stripe_size: ShardStripeSize, + pub(crate) preferred_az: Option>, +} + impl ComputeHook { pub(super) fn new(config: Config) -> Self { let authorization_header = config @@ -363,6 +411,7 @@ impl ComputeHook { tenant_id, shards, stripe_size, + preferred_az: _preferred_az, } = reconfigure_request; let compute_pageservers = shards @@ -503,24 +552,30 @@ impl ComputeHook { } /// Synchronous phase: update the per-tenant state for the next intended notification - fn notify_prepare( - &self, - tenant_shard_id: TenantShardId, - node_id: NodeId, - stripe_size: ShardStripeSize, - ) -> MaybeSendResult { + fn notify_prepare(&self, shard_update: ShardUpdate) -> MaybeSendResult { let mut state_locked = self.state.lock().unwrap(); use std::collections::hash_map::Entry; + let tenant_shard_id = shard_update.tenant_shard_id; + let tenant = match state_locked.entry(tenant_shard_id.tenant_id) { - Entry::Vacant(e) => e.insert(ComputeHookTenant::new( - tenant_shard_id, - stripe_size, - node_id, - )), + Entry::Vacant(e) => { + let ShardUpdate { + tenant_shard_id, + node_id, + stripe_size, + preferred_az, + } = shard_update; + e.insert(ComputeHookTenant::new( + tenant_shard_id, + stripe_size, + preferred_az.map(|az| az.into_owned()), + node_id, + )) + } Entry::Occupied(e) => { let tenant = e.into_mut(); - tenant.update(tenant_shard_id, stripe_size, node_id); + tenant.update(shard_update); tenant } }; @@ -608,13 +663,14 @@ impl ComputeHook { /// if something failed. pub(super) fn notify_background( self: &Arc, - notifications: Vec<(TenantShardId, NodeId, ShardStripeSize)>, + notifications: Vec, result_tx: tokio::sync::mpsc::Sender>, cancel: &CancellationToken, ) { let mut maybe_sends = Vec::new(); - for (tenant_shard_id, node_id, stripe_size) in notifications { - let maybe_send_result = self.notify_prepare(tenant_shard_id, node_id, stripe_size); + for shard_update in notifications { + let tenant_shard_id = shard_update.tenant_shard_id; + let maybe_send_result = self.notify_prepare(shard_update); maybe_sends.push((tenant_shard_id, maybe_send_result)) } @@ -678,15 +734,14 @@ impl ComputeHook { /// periods, but we don't retry forever. The **caller** is responsible for handling failures and /// ensuring that they eventually call again to ensure that the compute is eventually notified of /// the proper pageserver nodes for a tenant. - #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))] - pub(super) async fn notify( + #[tracing::instrument(skip_all, fields(tenant_id=%shard_update.tenant_shard_id.tenant_id, shard_id=%shard_update.tenant_shard_id.shard_slug(), node_id))] + pub(super) async fn notify<'a>( &self, - tenant_shard_id: TenantShardId, - node_id: NodeId, - stripe_size: ShardStripeSize, + shard_update: ShardUpdate<'a>, cancel: &CancellationToken, ) -> Result<(), NotifyError> { - let maybe_send_result = self.notify_prepare(tenant_shard_id, node_id, stripe_size); + let tenant_shard_id = shard_update.tenant_shard_id; + let maybe_send_result = self.notify_prepare(shard_update); self.notify_execute(maybe_send_result, tenant_shard_id, cancel) .await } @@ -739,6 +794,7 @@ pub(crate) mod tests { shard_number: ShardNumber(0), }, ShardStripeSize(12345), + None, NodeId(1), ); @@ -765,30 +821,32 @@ pub(crate) mod tests { // Writing the first shard of a multi-sharded situation (i.e. in a split) // resets the tenant state and puts it in an non-notifying state (need to // see all shards) - tenant_state.update( - TenantShardId { + tenant_state.update(ShardUpdate { + tenant_shard_id: TenantShardId { tenant_id, shard_count: ShardCount::new(2), shard_number: ShardNumber(1), }, - ShardStripeSize(32768), - NodeId(1), - ); + stripe_size: ShardStripeSize(32768), + preferred_az: None, + node_id: NodeId(1), + }); assert!(matches!( tenant_state.maybe_send(tenant_id, None), MaybeSendResult::Noop )); // Writing the second shard makes it ready to notify - tenant_state.update( - TenantShardId { + tenant_state.update(ShardUpdate { + tenant_shard_id: TenantShardId { tenant_id, shard_count: ShardCount::new(2), shard_number: ShardNumber(0), }, - ShardStripeSize(32768), - NodeId(1), - ); + stripe_size: ShardStripeSize(32768), + preferred_az: None, + node_id: NodeId(1), + }); let send_result = tenant_state.maybe_send(tenant_id, None); let MaybeSendResult::Transmit((request, mut guard)) = send_result else { diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index 3ad386a95b..475f91eff4 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -1,13 +1,14 @@ use crate::pageserver_client::PageserverClient; use crate::persistence::Persistence; -use crate::service; -use pageserver_api::controller_api::PlacementPolicy; +use crate::{compute_hook, service}; +use pageserver_api::controller_api::{AvailabilityZone, PlacementPolicy}; use pageserver_api::models::{ LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig, }; use pageserver_api::shard::{ShardIdentity, TenantShardId}; use pageserver_client::mgmt_api; use reqwest::StatusCode; +use std::borrow::Cow; use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -45,6 +46,7 @@ pub(super) struct Reconciler { pub(crate) reconciler_config: ReconcilerConfig, pub(crate) config: TenantConfig, + pub(crate) preferred_az: Option, /// Observed state from the point of view of the reconciler. /// This gets updated as the reconciliation makes progress. @@ -834,9 +836,12 @@ impl Reconciler { let result = self .compute_hook .notify( - self.tenant_shard_id, - node.get_id(), - self.shard.stripe_size, + compute_hook::ShardUpdate { + tenant_shard_id: self.tenant_shard_id, + node_id: node.get_id(), + stripe_size: self.shard.stripe_size, + preferred_az: self.preferred_az.as_ref().map(Cow::Borrowed), + }, &self.cancel, ) .await; diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index a89e4741f6..42b50835f8 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -18,7 +18,7 @@ use crate::{ background_node_operations::{ Drain, Fill, Operation, OperationError, OperationHandler, MAX_RECONCILES_PER_OPERATION, }, - compute_hook::NotifyError, + compute_hook::{self, NotifyError}, drain_utils::{self, TenantShardDrain, TenantShardIterator}, id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard}, leadership::Leadership, @@ -656,11 +656,14 @@ impl Service { // emit a compute notification for this. In the case where our observed state does not // yet match our intent, we will eventually reconcile, and that will emit a compute notification. if let Some(attached_at) = tenant_shard.stably_attached() { - compute_notifications.push(( - *tenant_shard_id, - attached_at, - tenant_shard.shard.stripe_size, - )); + compute_notifications.push(compute_hook::ShardUpdate { + tenant_shard_id: *tenant_shard_id, + node_id: attached_at, + stripe_size: tenant_shard.shard.stripe_size, + preferred_az: tenant_shard + .preferred_az() + .map(|az| Cow::Owned(az.clone())), + }); } } } @@ -4786,7 +4789,15 @@ impl Service { for (child_id, child_ps, stripe_size) in child_locations { if let Err(e) = self .compute_hook - .notify(child_id, child_ps, stripe_size, &self.cancel) + .notify( + compute_hook::ShardUpdate { + tenant_shard_id: child_id, + node_id: child_ps, + stripe_size, + preferred_az: preferred_az_id.as_ref().map(Cow::Borrowed), + }, + &self.cancel, + ) .await { tracing::warn!("Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})", diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index f1b921646f..cba579e8a7 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -1198,6 +1198,7 @@ impl TenantShard { detach, reconciler_config, config: self.config.clone(), + preferred_az: self.preferred_az_id.clone(), observed: self.observed.clone(), original_observed: self.observed.clone(), compute_hook: compute_hook.clone(), diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 13ada1361e..2553a0c99a 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -134,6 +134,9 @@ DEFAULT_BRANCH_NAME: str = "main" BASE_PORT: int = 15000 +# By default we create pageservers with this phony AZ +DEFAULT_AZ_ID: str = "us-east-2a" + @pytest.fixture(scope="session") def neon_api_key() -> str: @@ -1093,7 +1096,7 @@ class NeonEnv: "pg_auth_type": pg_auth_type, "http_auth_type": http_auth_type, # Default which can be overriden with `NeonEnvBuilder.pageserver_config_override` - "availability_zone": "us-east-2a", + "availability_zone": DEFAULT_AZ_ID, # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids # the pageserver taking a long time to start up due to syncfs flushing other tests' data "no_sync": True, diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 743ab0088b..4c381b563f 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -11,6 +11,7 @@ from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineArchival from fixtures.compute_reconfigure import ComputeReconfigure from fixtures.log_helper import log from fixtures.neon_fixtures import ( + DEFAULT_AZ_ID, NeonEnv, NeonEnvBuilder, StorageControllerApiException, @@ -793,6 +794,7 @@ def test_sharding_split_stripe_size( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } assert notifications[0] == expect @@ -812,6 +814,7 @@ def test_sharding_split_stripe_size( {"node_id": int(env.pageservers[0].id), "shard_number": 0}, {"node_id": int(env.pageservers[0].id), "shard_number": 1}, ], + "preferred_az": DEFAULT_AZ_ID, } log.info(f"Got notification: {notifications[1]}") assert notifications[1] == expect_after diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index ae9b596a1b..0be800d103 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -16,6 +16,7 @@ from fixtures.common_types import TenantId, TenantShardId, TimelineId from fixtures.compute_reconfigure import ComputeReconfigure from fixtures.log_helper import log from fixtures.neon_fixtures import ( + DEFAULT_AZ_ID, NeonEnv, NeonEnvBuilder, NeonPageserver, @@ -599,6 +600,7 @@ def test_storage_controller_compute_hook( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } assert notifications[0] == expect @@ -616,6 +618,7 @@ def test_storage_controller_compute_hook( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[1].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } def received_migration_notification(): @@ -643,6 +646,7 @@ def test_storage_controller_compute_hook( {"node_id": int(env.pageservers[1].id), "shard_number": 0}, {"node_id": int(env.pageservers[1].id), "shard_number": 1}, ], + "preferred_az": DEFAULT_AZ_ID, } def received_split_notification(): @@ -714,6 +718,7 @@ def test_storage_controller_stuck_compute_hook( "tenant_id": str(env.initial_tenant), "stripe_size": None, "shards": [{"node_id": int(env.pageservers[0].id), "shard_number": 0}], + "preferred_az": DEFAULT_AZ_ID, } assert notifications[0] == expect From 2ee6bc5ec42590e958fc246092f8e98202fc9173 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Tue, 17 Dec 2024 20:06:18 +0000 Subject: [PATCH 33/89] chore(proxy): update vendored postgres libs to edition 2021 (#10139) I ran `cargo fix --edition` in each project prior, and it found nothing that needed fixing. --- libs/proxy/postgres-protocol2/Cargo.toml | 2 +- libs/proxy/postgres-protocol2/src/lib.rs | 3 +-- libs/proxy/postgres-protocol2/src/message/frontend.rs | 1 - libs/proxy/postgres-types2/Cargo.toml | 2 +- libs/proxy/postgres-types2/src/lib.rs | 3 +-- libs/proxy/tokio-postgres2/Cargo.toml | 2 +- libs/proxy/tokio-postgres2/src/lib.rs | 2 +- 7 files changed, 6 insertions(+), 9 deletions(-) diff --git a/libs/proxy/postgres-protocol2/Cargo.toml b/libs/proxy/postgres-protocol2/Cargo.toml index f71c1599c7..f66a292d5e 100644 --- a/libs/proxy/postgres-protocol2/Cargo.toml +++ b/libs/proxy/postgres-protocol2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "postgres-protocol2" version = "0.1.0" -edition = "2018" +edition = "2021" license = "MIT/Apache-2.0" [dependencies] diff --git a/libs/proxy/postgres-protocol2/src/lib.rs b/libs/proxy/postgres-protocol2/src/lib.rs index 947f2f835d..6032440f9a 100644 --- a/libs/proxy/postgres-protocol2/src/lib.rs +++ b/libs/proxy/postgres-protocol2/src/lib.rs @@ -9,8 +9,7 @@ //! //! This library assumes that the `client_encoding` backend parameter has been //! set to `UTF8`. It will most likely not behave properly if that is not the case. -#![doc(html_root_url = "https://docs.rs/postgres-protocol/0.6")] -#![warn(missing_docs, rust_2018_idioms, clippy::all)] +#![warn(missing_docs, clippy::all)] use byteorder::{BigEndian, ByteOrder}; use bytes::{BufMut, BytesMut}; diff --git a/libs/proxy/postgres-protocol2/src/message/frontend.rs b/libs/proxy/postgres-protocol2/src/message/frontend.rs index bc6168f337..640f35ada3 100644 --- a/libs/proxy/postgres-protocol2/src/message/frontend.rs +++ b/libs/proxy/postgres-protocol2/src/message/frontend.rs @@ -3,7 +3,6 @@ use byteorder::{BigEndian, ByteOrder}; use bytes::{Buf, BufMut, BytesMut}; -use std::convert::TryFrom; use std::error::Error; use std::io; use std::marker; diff --git a/libs/proxy/postgres-types2/Cargo.toml b/libs/proxy/postgres-types2/Cargo.toml index 58cfb5571f..57efd94cd3 100644 --- a/libs/proxy/postgres-types2/Cargo.toml +++ b/libs/proxy/postgres-types2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "postgres-types2" version = "0.1.0" -edition = "2018" +edition = "2021" license = "MIT/Apache-2.0" [dependencies] diff --git a/libs/proxy/postgres-types2/src/lib.rs b/libs/proxy/postgres-types2/src/lib.rs index 18ba032151..d4f3afdfd4 100644 --- a/libs/proxy/postgres-types2/src/lib.rs +++ b/libs/proxy/postgres-types2/src/lib.rs @@ -2,8 +2,7 @@ //! //! This crate is used by the `tokio-postgres` and `postgres` crates. You normally don't need to depend directly on it //! unless you want to define your own `ToSql` or `FromSql` definitions. -#![doc(html_root_url = "https://docs.rs/postgres-types/0.2")] -#![warn(clippy::all, rust_2018_idioms, missing_docs)] +#![warn(clippy::all, missing_docs)] use fallible_iterator::FallibleIterator; use postgres_protocol2::types; diff --git a/libs/proxy/tokio-postgres2/Cargo.toml b/libs/proxy/tokio-postgres2/Cargo.toml index 7130c1b726..56e7c4da47 100644 --- a/libs/proxy/tokio-postgres2/Cargo.toml +++ b/libs/proxy/tokio-postgres2/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tokio-postgres2" version = "0.1.0" -edition = "2018" +edition = "2021" license = "MIT/Apache-2.0" [dependencies] diff --git a/libs/proxy/tokio-postgres2/src/lib.rs b/libs/proxy/tokio-postgres2/src/lib.rs index 901ed0c96c..9155dd8279 100644 --- a/libs/proxy/tokio-postgres2/src/lib.rs +++ b/libs/proxy/tokio-postgres2/src/lib.rs @@ -1,5 +1,5 @@ //! An asynchronous, pipelined, PostgreSQL client. -#![warn(rust_2018_idioms, clippy::all)] +#![warn(clippy::all)] pub use crate::cancel_token::CancelToken; pub use crate::client::{Client, SocketConfig}; From c52514ab02b96e273f8a99362e2401e4a7bc3982 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Tue, 17 Dec 2024 21:47:44 +0100 Subject: [PATCH 34/89] Fix allure report creation on periodic `pg_regress` testing (#10171) ## Problem The allure report finishes with the error `HttpError: Resource not accessible by integration` while running the `pg_regress` test against a cloud staging project due to a lack of permissions. ## Summary of changes The permissions are added. --- .github/workflows/cloud-regress.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 55f42ea533..09d6acd325 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -21,6 +21,8 @@ concurrency: permissions: id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: write jobs: regress: From aaf980f70d38d6a6a54494aea1b6b16e5328abf5 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 18 Dec 2024 11:34:38 +0200 Subject: [PATCH 35/89] Online checkpoint replication state (#9976) ## Problem See https://neondb.slack.com/archives/C04DGM6SMTM/p1733180965970089 Replication state is checkpointed only by shutdown checkpoint. It means that replication snapshots are not removed till compute shutdown. ## Summary of changes Checkpoint replication state during online checkpoint Related Postgres PR: https://github.com/neondatabase/postgres/pull/546 Co-authored-by: Konstantin Knizhnik --- vendor/postgres-v17 | 2 +- vendor/revisions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 65c4e46baf..7e3f3974bc 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 65c4e46baf56ec05412c7dd63d62faff0b33dcfb +Subproject commit 7e3f3974bc8895938308f94d0e96879ffae638cd diff --git a/vendor/revisions.json b/vendor/revisions.json index c8db81c73f..bff2f70931 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,7 +1,7 @@ { "v17": [ "17.2", - "65c4e46baf56ec05412c7dd63d62faff0b33dcfb" + "7e3f3974bc8895938308f94d0e96879ffae638cd" ], "v16": [ "16.6", From 85696297c5dc15b74384441f318c9381d99086e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 18 Dec 2024 13:47:56 +0100 Subject: [PATCH 36/89] Add safekeepers command to storcon_cli for listing (#10151) Add a `safekeepers` subcommand to `storcon_cli` that allows listing the safekeepers. ``` $ curl -X POST --url http://localhost:1234/control/v1/safekeeper/42 --data \ '{"active":true, "id":42, "created_at":"2023-10-25T09:11:25Z", "updated_at":"2024-08-28T11:32:43Z","region_id":"neon_local","host":"localhost","port":5454,"http_port":0,"version":123,"availability_zone_id":"us-east-2b"}' $ cargo run --bin storcon_cli -- --api http://localhost:1234 safekeepers Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.38s Running `target/debug/storcon_cli --api 'http://localhost:1234' safekeepers` +----+---------+-----------+------+-----------+------------+ | Id | Version | Host | Port | Http Port | AZ Id | +==========================================================+ | 42 | 123 | localhost | 5454 | 0 | us-east-2b | +----+---------+-----------+------+-----------+------------+ ``` Also: * Don't return the raw `SafekeeperPersistence` struct that contains the raw database presentation, but instead a new `SafekeeperDescribeResponse` struct. * The `SafekeeperPersistence` struct leaves out the `active` field on purpose because we want to deprecate it and replace it with a `scheduling_policy` one. Part of https://github.com/neondatabase/neon/issues/9981 --- control_plane/storcon_cli/src/main.rs | 30 ++++++++++++++++++- libs/pageserver_api/src/controller_api.rs | 17 +++++++++++ storage_controller/src/persistence.rs | 13 ++++++++ storage_controller/src/service.rs | 26 +++++++++++----- .../regress/test_storage_controller.py | 2 +- 5 files changed, 78 insertions(+), 10 deletions(-) diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index df07216fde..6ee1044c18 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -5,7 +5,8 @@ use clap::{Parser, Subcommand}; use pageserver_api::{ controller_api::{ AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, - ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest, + SafekeeperDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest, + TenantDescribeResponse, TenantPolicyRequest, }, models::{ EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, @@ -211,6 +212,8 @@ enum Command { #[arg(long)] timeout: humantime::Duration, }, + /// List safekeepers known to the storage controller + Safekeepers {}, } #[derive(Parser)] @@ -1020,6 +1023,31 @@ async fn main() -> anyhow::Result<()> { "Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}" ); } + Command::Safekeepers {} => { + let mut resp = storcon_client + .dispatch::<(), Vec>( + Method::GET, + "control/v1/safekeeper".to_string(), + None, + ) + .await?; + + resp.sort_by(|a, b| a.id.cmp(&b.id)); + + let mut table = comfy_table::Table::new(); + table.set_header(["Id", "Version", "Host", "Port", "Http Port", "AZ Id"]); + for sk in resp { + table.add_row([ + format!("{}", sk.id), + format!("{}", sk.version), + sk.host, + format!("{}", sk.port), + format!("{}", sk.http_port), + sk.availability_zone_id.to_string(), + ]); + } + println!("{table}"); + } } Ok(()) diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index ec7b81423a..faf11e487c 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -372,6 +372,23 @@ pub struct MetadataHealthListOutdatedResponse { pub health_records: Vec, } +/// Publicly exposed safekeeper description +/// +/// The `active` flag which we have in the DB is not included on purpose: it is deprecated. +#[derive(Serialize, Deserialize, Clone)] +pub struct SafekeeperDescribeResponse { + pub id: NodeId, + pub region_id: String, + /// 1 is special, it means just created (not currently posted to storcon). + /// Zero or negative is not really expected. + /// Otherwise the number from `release-$(number_of_commits_on_branch)` tag. + pub version: i64, + pub host: String, + pub port: i32, + pub http_port: i32, + pub availability_zone_id: String, +} + #[cfg(test)] mod test { use super::*; diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index e17fe78d25..cc377e606e 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -11,6 +11,7 @@ use diesel::Connection; use itertools::Itertools; use pageserver_api::controller_api::AvailabilityZone; use pageserver_api::controller_api::MetadataHealthRecord; +use pageserver_api::controller_api::SafekeeperDescribeResponse; use pageserver_api::controller_api::ShardSchedulingPolicy; use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy}; use pageserver_api::models::TenantConfig; @@ -1241,6 +1242,18 @@ impl SafekeeperPersistence { availability_zone_id: &self.availability_zone_id, } } + pub(crate) fn as_describe_response(&self) -> SafekeeperDescribeResponse { + // omit the `active` flag on purpose: it is deprecated. + SafekeeperDescribeResponse { + id: NodeId(self.id as u64), + region_id: self.region_id.clone(), + version: self.version, + host: self.host.clone(), + port: self.port, + http_port: self.http_port, + availability_zone_id: self.availability_zone_id.clone(), + } + } } #[derive(Insertable, AsChangeset)] diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 42b50835f8..c0c5bc371a 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -46,10 +46,11 @@ use pageserver_api::{ controller_api::{ AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, - ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, - TenantCreateRequest, TenantCreateResponse, TenantCreateResponseShard, - TenantDescribeResponse, TenantDescribeResponseShard, TenantLocateResponse, - TenantPolicyRequest, TenantShardMigrateRequest, TenantShardMigrateResponse, + SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest, + ShardsPreferredAzsResponse, TenantCreateRequest, TenantCreateResponse, + TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard, + TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest, + TenantShardMigrateResponse, }, models::{ SecondaryProgress, TenantConfigPatchRequest, TenantConfigRequest, @@ -7169,15 +7170,24 @@ impl Service { pub(crate) async fn safekeepers_list( &self, - ) -> Result, DatabaseError> { - self.persistence.list_safekeepers().await + ) -> Result, DatabaseError> { + Ok(self + .persistence + .list_safekeepers() + .await? + .into_iter() + .map(|v| v.as_describe_response()) + .collect::>()) } pub(crate) async fn get_safekeeper( &self, id: i64, - ) -> Result { - self.persistence.safekeeper_get(id).await + ) -> Result { + self.persistence + .safekeeper_get(id) + .await + .map(|v| v.as_describe_response()) } pub(crate) async fn upsert_safekeeper( diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 0be800d103..7062c35e05 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -3009,7 +3009,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder): def eq_safekeeper_records(a: dict[str, Any], b: dict[str, Any]) -> bool: compared = [dict(a), dict(b)] - masked_keys = ["created_at", "updated_at"] + masked_keys = ["created_at", "updated_at", "active"] for d in compared: # keep deleting these in case we are comparing the body as it will be uploaded by real scripts From 1d12efc42886bcb204db4c764ea64413da5c8dba Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:37:26 -0500 Subject: [PATCH 37/89] fix(pageserver): allow repartition errors during gc-compaction smoke tests (#10164) ## Problem part of https://github.com/neondatabase/neon/issues/9114 In https://github.com/neondatabase/neon/pull/10127 we fixed the race, but we didn't add the errors to the allowlist. ## Summary of changes * Allow repartition errors in the gc-compaction smoke test. I think it might be worth to refactor the code to allow multiple threads getting a copy of repartition status (i.e., using Rcu) in the future. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 2 +- test_runner/regress/test_compaction.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 8b6cc8ed84..a4e8f39522 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1823,7 +1823,7 @@ impl Timeline { // by estimating the amount of files read for a compaction job. We should also partition on LSN. let ((dense_ks, sparse_ks), _) = { let Ok(partition) = self.partitioning.try_lock() else { - bail!("failed to acquire partition lock"); + bail!("failed to acquire partition lock during gc-compaction"); }; partition.clone() }; diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 88873c63c2..aef9a825ee 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -134,6 +134,10 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): } env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF) + env.pageserver.allowed_errors.append( + r".*failed to acquire partition lock during gc-compaction.*" + ) + env.pageserver.allowed_errors.append(r".*repartition() called concurrently.*") tenant_id = env.initial_tenant timeline_id = env.initial_timeline From 1668d39b7cbe7d1bbe48152cb1b4024a6e2da90a Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 18 Dec 2024 16:51:53 +0100 Subject: [PATCH 38/89] safekeeper: fix typo in allowlist for `/profile/heap` (#10186) --- safekeeper/src/http/routes.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 9bc1bf3409..6186f4c3ba 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -564,7 +564,7 @@ pub fn make_router( if conf.http_auth.is_some() { router = router.middleware(auth_middleware(|request| { const ALLOWLIST_ROUTES: &[&str] = - &["/v1/status", "/metrics", "/profile/cpu", "profile/heap"]; + &["/v1/status", "/metrics", "/profile/cpu", "/profile/heap"]; if ALLOWLIST_ROUTES.contains(&request.uri().path()) { None } else { From d63602cc7822eb5f9670d7e7926bb412eba1ff3a Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Wed, 18 Dec 2024 16:03:14 +0000 Subject: [PATCH 39/89] chore(proxy): fully remove allow-self-signed-compute flag (#10168) When https://github.com/neondatabase/cloud/pull/21856 is merged, this flag is no longer necessary. --- proxy/src/bin/local_proxy.rs | 1 - proxy/src/bin/proxy.rs | 7 --- proxy/src/cancellation.rs | 44 +++++++---------- proxy/src/compute.rs | 69 +++------------------------ proxy/src/config.rs | 1 - proxy/src/console_redirect_proxy.rs | 1 - proxy/src/control_plane/mod.rs | 5 +- proxy/src/proxy/connect_compute.rs | 9 +--- proxy/src/proxy/mod.rs | 2 - test_runner/fixtures/neon_fixtures.py | 1 - 10 files changed, 25 insertions(+), 115 deletions(-) diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs index 968682cf0f..56bbd94850 100644 --- a/proxy/src/bin/local_proxy.rs +++ b/proxy/src/bin/local_proxy.rs @@ -271,7 +271,6 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig Ok(Box::leak(Box::new(ProxyConfig { tls_config: None, metric_collection: None, - allow_self_signed_compute: false, http_config, authentication_config: AuthenticationConfig { jwks_cache: JwkCache::default(), diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index e90555e250..3dcf9ca060 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -129,9 +129,6 @@ struct ProxyCliArgs { /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable). #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)] connect_compute_lock: String, - /// Allow self-signed certificates for compute nodes (for testing) - #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)] - allow_self_signed_compute: bool, #[clap(flatten)] sql_over_http: SqlOverHttpArgs, /// timeout for scram authentication protocol @@ -564,9 +561,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { _ => bail!("either both or neither tls-key and tls-cert must be specified"), }; - if args.allow_self_signed_compute { - warn!("allowing self-signed compute certificates"); - } let backup_metric_collection_config = config::MetricBackupCollectionConfig { interval: args.metric_backup_collection_interval, remote_storage_config: args.metric_backup_collection_remote_storage.clone(), @@ -641,7 +635,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { let config = ProxyConfig { tls_config, metric_collection, - allow_self_signed_compute: args.allow_self_signed_compute, http_config, authentication_config, proxy_protocol_v2: args.proxy_protocol_v2, diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index a58e3961da..ebaea173ae 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -4,7 +4,8 @@ use std::sync::Arc; use dashmap::DashMap; use ipnet::{IpNet, Ipv4Net, Ipv6Net}; use once_cell::sync::OnceCell; -use postgres_client::{tls::MakeTlsConnect, CancelToken}; +use postgres_client::tls::MakeTlsConnect; +use postgres_client::CancelToken; use pq_proto::CancelKeyData; use rustls::crypto::ring; use thiserror::Error; @@ -14,17 +15,16 @@ use tracing::{debug, info}; use uuid::Uuid; use crate::auth::{check_peer_addr_is_in_list, IpPattern}; +use crate::compute::load_certs; use crate::error::ReportableError; use crate::ext::LockExt; use crate::metrics::{CancellationRequest, CancellationSource, Metrics}; +use crate::postgres_rustls::MakeRustlsConnect; use crate::rate_limiter::LeakyBucketRateLimiter; use crate::redis::cancellation_publisher::{ CancellationPublisher, CancellationPublisherMut, RedisPublisherClient, }; -use crate::compute::{load_certs, AcceptEverythingVerifier}; -use crate::postgres_rustls::MakeRustlsConnect; - pub type CancelMap = Arc>>; pub type CancellationHandlerMain = CancellationHandler>>>; pub(crate) type CancellationHandlerMainInternal = Option>>; @@ -240,7 +240,6 @@ pub struct CancelClosure { cancel_token: CancelToken, ip_allowlist: Vec, hostname: String, // for pg_sni router - allow_self_signed_compute: bool, } impl CancelClosure { @@ -249,45 +248,34 @@ impl CancelClosure { cancel_token: CancelToken, ip_allowlist: Vec, hostname: String, - allow_self_signed_compute: bool, ) -> Self { Self { socket_addr, cancel_token, ip_allowlist, hostname, - allow_self_signed_compute, } } /// Cancels the query running on user's compute node. pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; - let client_config = if self.allow_self_signed_compute { - // Allow all certificates for creating the connection. Used only for tests - let verifier = Arc::new(AcceptEverythingVerifier); - rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) - .with_safe_default_protocol_versions() - .expect("ring should support the default protocol versions") - .dangerous() - .with_custom_certificate_verifier(verifier) - } else { - let root_store = TLS_ROOTS - .get_or_try_init(load_certs) - .map_err(|_e| { - CancelError::IO(std::io::Error::new( - std::io::ErrorKind::Other, - "TLS root store initialization failed".to_string(), - )) - })? - .clone(); + let root_store = TLS_ROOTS + .get_or_try_init(load_certs) + .map_err(|_e| { + CancelError::IO(std::io::Error::new( + std::io::ErrorKind::Other, + "TLS root store initialization failed".to_string(), + )) + })? + .clone(); + + let client_config = rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) .with_safe_default_protocol_versions() .expect("ring should support the default protocol versions") .with_root_certificates(root_store) - }; - - let client_config = client_config.with_no_client_auth(); + .with_no_client_auth(); let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config); let tls = >::make_tls_connect( diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 42df5ff5e3..8dc9b59e81 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -10,7 +10,6 @@ use postgres_client::tls::MakeTlsConnect; use postgres_client::{CancelToken, RawConnection}; use postgres_protocol::message::backend::NoticeResponseBody; use pq_proto::StartupMessageParams; -use rustls::client::danger::ServerCertVerifier; use rustls::crypto::ring; use rustls::pki_types::InvalidDnsNameError; use thiserror::Error; @@ -251,7 +250,6 @@ impl ConnCfg { pub(crate) async fn connect( &self, ctx: &RequestContext, - allow_self_signed_compute: bool, aux: MetricsAuxInfo, timeout: Duration, ) -> Result { @@ -259,25 +257,17 @@ impl ConnCfg { let (socket_addr, stream, host) = self.connect_raw(timeout).await?; drop(pause); - let client_config = if allow_self_signed_compute { - // Allow all certificates for creating the connection - let verifier = Arc::new(AcceptEverythingVerifier); - rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) - .with_safe_default_protocol_versions() - .expect("ring should support the default protocol versions") - .dangerous() - .with_custom_certificate_verifier(verifier) - } else { - let root_store = TLS_ROOTS - .get_or_try_init(load_certs) - .map_err(ConnectionError::TlsCertificateError)? - .clone(); + let root_store = TLS_ROOTS + .get_or_try_init(load_certs) + .map_err(ConnectionError::TlsCertificateError)? + .clone(); + + let client_config = rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) .with_safe_default_protocol_versions() .expect("ring should support the default protocol versions") .with_root_certificates(root_store) - }; - let client_config = client_config.with_no_client_auth(); + .with_no_client_auth(); let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config); let tls = >::make_tls_connect( @@ -320,7 +310,6 @@ impl ConnCfg { }, vec![], host.to_string(), - allow_self_signed_compute, ); let connection = PostgresConnection { @@ -365,50 +354,6 @@ pub(crate) fn load_certs() -> Result, Vec> = OnceCell::new(); -#[derive(Debug)] -pub(crate) struct AcceptEverythingVerifier; -impl ServerCertVerifier for AcceptEverythingVerifier { - fn supported_verify_schemes(&self) -> Vec { - use rustls::SignatureScheme; - // The schemes for which `SignatureScheme::supported_in_tls13` returns true. - vec![ - SignatureScheme::ECDSA_NISTP521_SHA512, - SignatureScheme::ECDSA_NISTP384_SHA384, - SignatureScheme::ECDSA_NISTP256_SHA256, - SignatureScheme::RSA_PSS_SHA512, - SignatureScheme::RSA_PSS_SHA384, - SignatureScheme::RSA_PSS_SHA256, - SignatureScheme::ED25519, - ] - } - fn verify_server_cert( - &self, - _end_entity: &rustls::pki_types::CertificateDer<'_>, - _intermediates: &[rustls::pki_types::CertificateDer<'_>], - _server_name: &rustls::pki_types::ServerName<'_>, - _ocsp_response: &[u8], - _now: rustls::pki_types::UnixTime, - ) -> Result { - Ok(rustls::client::danger::ServerCertVerified::assertion()) - } - fn verify_tls12_signature( - &self, - _message: &[u8], - _cert: &rustls::pki_types::CertificateDer<'_>, - _dss: &rustls::DigitallySignedStruct, - ) -> Result { - Ok(rustls::client::danger::HandshakeSignatureValid::assertion()) - } - fn verify_tls13_signature( - &self, - _message: &[u8], - _cert: &rustls::pki_types::CertificateDer<'_>, - _dss: &rustls::DigitallySignedStruct, - ) -> Result { - Ok(rustls::client::danger::HandshakeSignatureValid::assertion()) - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/proxy/src/config.rs b/proxy/src/config.rs index debd77ac32..33d1d2e9e4 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -25,7 +25,6 @@ use crate::types::Host; pub struct ProxyConfig { pub tls_config: Option, pub metric_collection: Option, - pub allow_self_signed_compute: bool, pub http_config: HttpConfig, pub authentication_config: AuthenticationConfig, pub proxy_protocol_v2: ProxyProtocolV2, diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 02398fb777..c477822e85 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -213,7 +213,6 @@ pub(crate) async fn handle_client( params_compat: true, params: ¶ms, locks: &config.connect_compute_locks, - allow_self_signed_compute: config.allow_self_signed_compute, }, &user_info, config.wake_compute_retry_config, diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs index c0718920b4..0ca1a6aae0 100644 --- a/proxy/src/control_plane/mod.rs +++ b/proxy/src/control_plane/mod.rs @@ -73,12 +73,9 @@ impl NodeInfo { pub(crate) async fn connect( &self, ctx: &RequestContext, - allow_self_signed_compute: bool, timeout: Duration, ) -> Result { - self.config - .connect(ctx, allow_self_signed_compute, self.aux.clone(), timeout) - .await + self.config.connect(ctx, self.aux.clone(), timeout).await } pub(crate) fn reuse_settings(&mut self, other: Self) { diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 6da4c90a53..4a30d23985 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -73,9 +73,6 @@ pub(crate) struct TcpMechanism<'a> { /// connect_to_compute concurrency lock pub(crate) locks: &'static ApiLocks, - - /// Whether we should accept self-signed certificates (for testing) - pub(crate) allow_self_signed_compute: bool, } #[async_trait] @@ -93,11 +90,7 @@ impl ConnectMechanism for TcpMechanism<'_> { ) -> Result { let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; - permit.release_result( - node_info - .connect(ctx, self.allow_self_signed_compute, timeout) - .await, - ) + permit.release_result(node_info.connect(ctx, timeout).await) } fn update_connect_config(&self, config: &mut compute::ConnCfg) { diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 4e5ecda237..dbe174cab7 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -348,8 +348,6 @@ pub(crate) async fn handle_client( params_compat, params: ¶ms, locks: &config.connect_compute_locks, - // only used for console redirect testing. - allow_self_signed_compute: false, }, &user_info, config.wake_compute_retry_config, diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 2553a0c99a..9f78ad120b 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3222,7 +3222,6 @@ class NeonProxy(PgProtocol): # Link auth backend params *["--auth-backend", "link"], *["--uri", NeonProxy.link_auth_uri], - *["--allow-self-signed-compute", "true"], ] class ProxyV1(AuthBackend): From 835287ba3aa1e6a4fea2c1929fbd601de9354218 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 18 Dec 2024 16:29:47 +0000 Subject: [PATCH 40/89] neon_local: add a `flock` to protect against concurrent execution (#10185) ## Problem `neon_local` has always been unsafe to run concurrently with itself: it uses simple text files for persistent state, and concurrent runs will step on each other. In some test environments we intentionally handle this with mutexes in python land, but it's fragile to try and always remember to do that. ## Summary of changes - Add a `flock` based mutex around the `main` function of neon_local, using the repo directory as the file to lock - Clean up an Option<> around control_plane_api, this is a drive-by change because it was one of the fields that had a weird effect when previous concurrent stuff stamped on it. --- control_plane/src/bin/neon_local.rs | 59 ++++++++++++++++--------- control_plane/src/local_env.rs | 10 ++--- control_plane/src/pageserver.rs | 26 +++++------ control_plane/src/storage_controller.rs | 4 +- 4 files changed, 57 insertions(+), 42 deletions(-) diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 1ea443b026..c73debae4c 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -19,6 +19,7 @@ use control_plane::storage_controller::{ NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController, }; use control_plane::{broker, local_env}; +use nix::fcntl::{flock, FlockArg}; use pageserver_api::config::{ DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT, @@ -36,6 +37,8 @@ use safekeeper_api::{ }; use std::borrow::Cow; use std::collections::{BTreeSet, HashMap}; +use std::fs::File; +use std::os::fd::AsRawFd; use std::path::PathBuf; use std::process::exit; use std::str::FromStr; @@ -689,6 +692,21 @@ struct TimelineTreeEl { pub children: BTreeSet, } +/// A flock-based guard over the neon_local repository directory +struct RepoLock { + _file: File, +} + +impl RepoLock { + fn new() -> Result { + let repo_dir = File::open(local_env::base_path())?; + let repo_dir_fd = repo_dir.as_raw_fd(); + flock(repo_dir_fd, FlockArg::LockExclusive)?; + + Ok(Self { _file: repo_dir }) + } +} + // Main entry point for the 'neon_local' CLI utility // // This utility helps to manage neon installation. That includes following: @@ -700,9 +718,14 @@ fn main() -> Result<()> { let cli = Cli::parse(); // Check for 'neon init' command first. - let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command { - handle_init(&args).map(|env| Some(Cow::Owned(env))) + let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command { + (handle_init(&args).map(|env| Some(Cow::Owned(env))), None) } else { + // This tool uses a collection of simple files to store its state, and consequently + // it is not generally safe to run multiple commands concurrently. Rather than expect + // all callers to know this, use a lock file to protect against concurrent execution. + let _repo_lock = RepoLock::new().unwrap(); + // all other commands need an existing config let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?; let original_env = env.clone(); @@ -728,11 +751,12 @@ fn main() -> Result<()> { NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env), }; - if &original_env != env { + let subcommand_result = if &original_env != env { subcommand_result.map(|()| Some(Cow::Borrowed(env))) } else { subcommand_result.map(|()| None) - } + }; + (subcommand_result, Some(_repo_lock)) }; match subcommand_result { @@ -922,7 +946,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result { } else { // User (likely interactive) did not provide a description of the environment, give them the default NeonLocalInitConf { - control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())), + control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()), broker: NeonBroker { listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(), }, @@ -1718,18 +1742,15 @@ async fn handle_start_all_impl( broker::start_broker_process(env, &retry_timeout).await }); - // Only start the storage controller if the pageserver is configured to need it - if env.control_plane_api.is_some() { - js.spawn(async move { - let storage_controller = StorageController::from_env(env); - storage_controller - .start(NeonStorageControllerStartArgs::with_default_instance_id( - retry_timeout, - )) - .await - .map_err(|e| e.context("start storage_controller")) - }); - } + js.spawn(async move { + let storage_controller = StorageController::from_env(env); + storage_controller + .start(NeonStorageControllerStartArgs::with_default_instance_id( + retry_timeout, + )) + .await + .map_err(|e| e.context("start storage_controller")) + }); for ps_conf in &env.pageservers { js.spawn(async move { @@ -1774,10 +1795,6 @@ async fn neon_start_status_check( const RETRY_INTERVAL: Duration = Duration::from_millis(100); const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5); - if env.control_plane_api.is_none() { - return Ok(()); - } - let storcon = StorageController::from_env(env); let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis(); diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 032c88a829..489f9c8509 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -76,7 +76,7 @@ pub struct LocalEnv { // Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will // be propagated into each pageserver's configuration. - pub control_plane_api: Option, + pub control_plane_api: Url, // Control plane upcall API for storage controller. If set, this will be propagated into the // storage controller's configuration. @@ -133,7 +133,7 @@ pub struct NeonLocalInitConf { pub storage_controller: Option, pub pageservers: Vec, pub safekeepers: Vec, - pub control_plane_api: Option>, + pub control_plane_api: Option, pub control_plane_compute_hook_api: Option>, } @@ -535,7 +535,7 @@ impl LocalEnv { storage_controller, pageservers, safekeepers, - control_plane_api, + control_plane_api: control_plane_api.unwrap(), control_plane_compute_hook_api, branch_name_mappings, } @@ -638,7 +638,7 @@ impl LocalEnv { storage_controller: self.storage_controller.clone(), pageservers: vec![], // it's skip_serializing anyway safekeepers: self.safekeepers.clone(), - control_plane_api: self.control_plane_api.clone(), + control_plane_api: Some(self.control_plane_api.clone()), control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(), branch_name_mappings: self.branch_name_mappings.clone(), }, @@ -768,7 +768,7 @@ impl LocalEnv { storage_controller: storage_controller.unwrap_or_default(), pageservers: pageservers.iter().map(Into::into).collect(), safekeepers, - control_plane_api: control_plane_api.unwrap_or_default(), + control_plane_api: control_plane_api.unwrap(), control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(), branch_name_mappings: Default::default(), }; diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 9d3f018345..ef5b3d6593 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -95,21 +95,19 @@ impl PageServerNode { let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param]; - if let Some(control_plane_api) = &self.env.control_plane_api { - overrides.push(format!( - "control_plane_api='{}'", - control_plane_api.as_str() - )); + overrides.push(format!( + "control_plane_api='{}'", + self.env.control_plane_api.as_str() + )); - // Storage controller uses the same auth as pageserver: if JWT is enabled - // for us, we will also need it to talk to them. - if matches!(conf.http_auth_type, AuthType::NeonJWT) { - let jwt_token = self - .env - .generate_auth_token(&Claims::new(None, Scope::GenerationsApi)) - .unwrap(); - overrides.push(format!("control_plane_api_token='{}'", jwt_token)); - } + // Storage controller uses the same auth as pageserver: if JWT is enabled + // for us, we will also need it to talk to them. + if matches!(conf.http_auth_type, AuthType::NeonJWT) { + let jwt_token = self + .env + .generate_auth_token(&Claims::new(None, Scope::GenerationsApi)) + .unwrap(); + overrides.push(format!("control_plane_api_token='{}'", jwt_token)); } if !conf.other.contains_key("remote_storage") { diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index b70bd2e1b5..22d2420ed4 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -338,7 +338,7 @@ impl StorageController { .port(), ) } else { - let listen_url = self.env.control_plane_api.clone().unwrap(); + let listen_url = self.env.control_plane_api.clone(); let listen = format!( "{}:{}", @@ -708,7 +708,7 @@ impl StorageController { } else { // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out // for general purpose API access. - let listen_url = self.env.control_plane_api.clone().unwrap(); + let listen_url = self.env.control_plane_api.clone(); Url::from_str(&format!( "http://{}:{}/{path}", listen_url.host_str().unwrap(), From 3d1c3a80ae0fd2babe42d7fc2293cb2a058ff8cb Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:09:02 -0500 Subject: [PATCH 41/89] feat(pageserver): add compact queue http endpoint (#10173) ## Problem We cannot get the size of the compaction queue and access the info. Part of #9114 ## Summary of changes * Add an API endpoint to get the compaction queue. * gc_compaction test case now waits until the compaction finishes. --------- Signed-off-by: Alex Chi Z --- libs/pageserver_api/src/models.rs | 64 +++++++++++++++++++++++++ pageserver/src/http/routes.rs | 36 +++++++++++++- pageserver/src/tenant.rs | 23 +++++++-- pageserver/src/tenant/timeline.rs | 63 ++---------------------- test_runner/fixtures/pageserver/http.py | 13 ++++- test_runner/regress/test_compaction.py | 6 +++ 6 files changed, 139 insertions(+), 66 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 5690b643f0..f3fc9fad76 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -6,6 +6,7 @@ pub mod utilization; use camino::Utf8PathBuf; pub use utilization::PageserverUtilization; +use core::ops::Range; use std::{ collections::HashMap, fmt::Display, @@ -28,6 +29,7 @@ use utils::{ }; use crate::{ + key::Key, reltag::RelTag, shard::{ShardCount, ShardStripeSize, TenantShardId}, }; @@ -210,6 +212,68 @@ pub enum TimelineState { Broken { reason: String, backtrace: String }, } +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +pub struct CompactLsnRange { + pub start: Lsn, + pub end: Lsn, +} + +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +pub struct CompactKeyRange { + #[serde_as(as = "serde_with::DisplayFromStr")] + pub start: Key, + #[serde_as(as = "serde_with::DisplayFromStr")] + pub end: Key, +} + +impl From> for CompactLsnRange { + fn from(range: Range) -> Self { + Self { + start: range.start, + end: range.end, + } + } +} + +impl From> for CompactKeyRange { + fn from(range: Range) -> Self { + Self { + start: range.start, + end: range.end, + } + } +} + +impl From for Range { + fn from(range: CompactLsnRange) -> Self { + range.start..range.end + } +} + +impl From for Range { + fn from(range: CompactKeyRange) -> Self { + range.start..range.end + } +} + +impl CompactLsnRange { + pub fn above(lsn: Lsn) -> Self { + Self { + start: lsn, + end: Lsn::MAX, + } + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct CompactInfoResponse { + pub compact_key_range: Option, + pub compact_lsn_range: Option, + pub sub_compaction: bool, +} + #[derive(Serialize, Deserialize, Clone)] pub struct TimelineCreateRequest { pub new_timeline_id: TimelineId, diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index db7d293856..60ef4c3702 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -97,8 +97,8 @@ use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError}; use crate::DEFAULT_PG_VERSION; use crate::{disk_usage_eviction_task, tenant}; use pageserver_api::models::{ - StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest, - TimelineInfo, + CompactInfoResponse, StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, + TimelineGcRequest, TimelineInfo, }; use utils::{ auth::SwappableJwtAuth, @@ -2039,6 +2039,34 @@ async fn timeline_cancel_compact_handler( .await } +// Get compact info of a timeline +async fn timeline_compact_info_handler( + request: Request, + _cancel: CancellationToken, +) -> Result, ApiError> { + let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_shard_id.tenant_id))?; + let state = get_state(&request); + async { + let tenant = state + .tenant_manager + .get_attached_tenant_shard(tenant_shard_id)?; + let res = tenant.get_scheduled_compaction_tasks(timeline_id); + let mut resp = Vec::new(); + for item in res { + resp.push(CompactInfoResponse { + compact_key_range: item.compact_key_range, + compact_lsn_range: item.compact_lsn_range, + sub_compaction: item.sub_compaction, + }); + } + json_response(StatusCode::OK, resp) + } + .instrument(info_span!("timeline_compact_info", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id)) + .await +} + // Run compaction immediately on given timeline. async fn timeline_compact_handler( mut request: Request, @@ -3400,6 +3428,10 @@ pub fn make_router( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc", |r| api_handler(r, timeline_gc_handler), ) + .get( + "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact", + |r| api_handler(r, timeline_compact_info_handler), + ) .put( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact", |r| api_handler(r, timeline_compact_handler), diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 99289d5f15..2e4c47c6e4 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3122,6 +3122,23 @@ impl Tenant { } } + pub(crate) fn get_scheduled_compaction_tasks( + &self, + timeline_id: TimelineId, + ) -> Vec { + use itertools::Itertools; + let guard = self.scheduled_compaction_tasks.lock().unwrap(); + guard + .get(&timeline_id) + .map(|tline_pending_tasks| { + tline_pending_tasks + .iter() + .map(|x| x.options.clone()) + .collect_vec() + }) + .unwrap_or_default() + } + /// Schedule a compaction task for a timeline. pub(crate) async fn schedule_compaction( &self, @@ -5759,13 +5776,13 @@ mod tests { use timeline::{CompactOptions, DeltaLayerTestDesc}; use utils::id::TenantId; + #[cfg(feature = "testing")] + use models::CompactLsnRange; #[cfg(feature = "testing")] use pageserver_api::record::NeonWalRecord; #[cfg(feature = "testing")] use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn}; #[cfg(feature = "testing")] - use timeline::CompactLsnRange; - #[cfg(feature = "testing")] use timeline::GcInfo; static TEST_KEY: Lazy = @@ -9634,7 +9651,7 @@ mod tests { #[cfg(feature = "testing")] #[tokio::test] async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> { - use timeline::CompactLsnRange; + use models::CompactLsnRange; let harness = TenantHarness::create("test_simple_bottom_most_compaction_on_branch").await?; let (tenant, ctx) = harness.load().await; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 87f5a03382..e71cb4db80 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -31,9 +31,9 @@ use pageserver_api::{ }, keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning}, models::{ - CompactionAlgorithm, CompactionAlgorithmSettings, DownloadRemoteLayersTaskInfo, - DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, - LsnLease, TimelineState, + CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings, + DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, + InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState, }, reltag::BlockNumber, shard::{ShardIdentity, ShardNumber, TenantShardId}, @@ -788,63 +788,6 @@ pub(crate) struct CompactRequest { pub sub_compaction_max_job_size_mb: Option, } -#[serde_with::serde_as] -#[derive(Debug, Clone, serde::Deserialize)] -pub(crate) struct CompactLsnRange { - pub start: Lsn, - pub end: Lsn, -} - -#[serde_with::serde_as] -#[derive(Debug, Clone, serde::Deserialize)] -pub(crate) struct CompactKeyRange { - #[serde_as(as = "serde_with::DisplayFromStr")] - pub start: Key, - #[serde_as(as = "serde_with::DisplayFromStr")] - pub end: Key, -} - -impl From> for CompactLsnRange { - fn from(range: Range) -> Self { - Self { - start: range.start, - end: range.end, - } - } -} - -impl From> for CompactKeyRange { - fn from(range: Range) -> Self { - Self { - start: range.start, - end: range.end, - } - } -} - -impl From for Range { - fn from(range: CompactLsnRange) -> Self { - range.start..range.end - } -} - -impl From for Range { - fn from(range: CompactKeyRange) -> Self { - range.start..range.end - } -} - -impl CompactLsnRange { - #[cfg(test)] - #[cfg(feature = "testing")] - pub fn above(lsn: Lsn) -> Self { - Self { - start: lsn, - end: Lsn::MAX, - } - } -} - #[derive(Debug, Clone, Default)] pub(crate) struct CompactOptions { pub flags: EnumSet, diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index eabdeb1053..378e568622 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -738,6 +738,18 @@ class PageserverHttpClient(requests.Session, MetricsGetter): res_json = res.json() assert res_json is None + def timeline_compact_info( + self, + tenant_id: TenantId | TenantShardId, + timeline_id: TimelineId, + ) -> Any: + res = self.get( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact", + ) + self.verbose_error(res) + res_json = res.json() + return res_json + def timeline_compact( self, tenant_id: TenantId | TenantShardId, @@ -749,7 +761,6 @@ class PageserverHttpClient(requests.Session, MetricsGetter): enhanced_gc_bottom_most_compaction=False, body: dict[str, Any] | None = None, ): - self.is_testing_enabled_or_skip() query = {} if force_repartition: query["force_repartition"] = "true" diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index aef9a825ee..ae48a8fc27 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -176,6 +176,12 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): workload.churn_rows(row_count, env.pageserver.id) + def compaction_finished(): + queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id)) + assert queue_depth == 0 + + wait_until(compaction_finished, timeout=60) + # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked) env.pageserver.assert_log_contains( "scheduled_compact_timeline.*picked .* layers for compaction" From 6d3e8096fcad394d387b59fa300f07fb19613760 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:10:05 -0500 Subject: [PATCH 42/89] refactor(test): tighten up test_gc_feedback (#10126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem In https://github.com/neondatabase/neon/pull/8103 we changed the test case to have more test coverage of gc_compaction. Now that we have `test_gc_compaction_smoke`, we can revert this test case to serve its original purpose and revert the parameter changes. part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes * Revert pitr_interval from 60s to 10s. * Assert the physical/logical size ratio in the benchmark. --------- Signed-off-by: Alex Chi Z Co-authored-by: Arpad Müller --- test_runner/performance/test_gc_feedback.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test_runner/performance/test_gc_feedback.py b/test_runner/performance/test_gc_feedback.py index 07f244da0c..acb7b56fd0 100644 --- a/test_runner/performance/test_gc_feedback.py +++ b/test_runner/performance/test_gc_feedback.py @@ -22,7 +22,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma "checkpoint_distance": f"{1024 ** 2}", "compaction_target_size": f"{1024 ** 2}", # set PITR interval to be small, so we can do GC - "pitr_interval": "60 s", + "pitr_interval": "10 s", # "compaction_threshold": "3", # "image_creation_threshold": "2", } @@ -32,6 +32,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma n_steps = 10 n_update_iters = 100 step_size = 10000 + branch_created = 0 with endpoint.cursor() as cur: cur.execute("SET statement_timeout='1000s'") cur.execute( @@ -66,6 +67,7 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma if mode == "with_snapshots": if step == n_steps / 2: env.create_branch("child") + branch_created += 1 max_num_of_deltas_above_image = 0 max_total_num_of_deltas = 0 @@ -142,6 +144,15 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma with layer_map_path.open("w") as f: f.write(json.dumps(client.timeline_layer_map_info(tenant_id, timeline_id))) + # We should have collected all garbage + if mode == "normal": + # in theory we should get physical size ~= logical size, but given that gc interval is 10s, + # and the layer has indexes that might contribute to the fluctuation, we allow a small margin + # of 1 here, and the end ratio we are asserting is 1 (margin) + 1 (expected) = 2. + assert physical_size / logical_size < 2 + elif mode == "with_snapshots": + assert physical_size / logical_size < (2 + branch_created) + @pytest.mark.timeout(10000) def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker): From 61fcf64c22b7464aa9beb5f22ec9ded96891a12f Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 18 Dec 2024 21:15:38 +0200 Subject: [PATCH 43/89] Fix flukyness of test_physical_and_logical_replicaiton.py (#10176) ## Problem See https://github.com/neondatabase/neon/issues/10037 test_physical_and_logical_replication.py sometimes failed. ## Summary of changes Add `wait_replica_caughtup` to wait for replica sync Co-authored-by: Konstantin Knizhnik --- test_runner/regress/test_physical_and_logical_replicaiton.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test_runner/regress/test_physical_and_logical_replicaiton.py b/test_runner/regress/test_physical_and_logical_replicaiton.py index ad2d0871b8..3f9824ee67 100644 --- a/test_runner/regress/test_physical_and_logical_replicaiton.py +++ b/test_runner/regress/test_physical_and_logical_replicaiton.py @@ -2,7 +2,7 @@ from __future__ import annotations import time -from fixtures.neon_fixtures import NeonEnv, logical_replication_sync +from fixtures.neon_fixtures import NeonEnv, logical_replication_sync, wait_replica_caughtup def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonEnv, vanilla_pg): @@ -38,6 +38,8 @@ def test_physical_and_logical_replication_slot_not_copied(neon_simple_env: NeonE for pk in range(n_records): p_cur.execute("insert into t (pk) values (%s)", (pk,)) + wait_replica_caughtup(primary, secondary) + s_cur.execute("select count(*) from t") assert s_cur.fetchall()[0][0] == n_records From cc138b56f983c8fc66e737c5e02898121fdd72ec Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 19 Dec 2024 04:45:06 -0500 Subject: [PATCH 44/89] fix(pageserver): run psql in thread to avoid blocking (#10177) ## Problem ref https://github.com/neondatabase/neon/issues/10170 ref https://github.com/neondatabase/neon/issues/9994 The psql command will block the main thread, causing other async tasks to timeout (i.e., HTTP connect). Therefore, we need to move it to an I/O executor thread. ## Summary of changes * run psql connection in a thread --------- Signed-off-by: Alex Chi Z Co-authored-by: John Spray --- .../regress/test_pageserver_layer_rolling.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test_runner/regress/test_pageserver_layer_rolling.py b/test_runner/regress/test_pageserver_layer_rolling.py index 706da1e35e..fcc465f90a 100644 --- a/test_runner/regress/test_pageserver_layer_rolling.py +++ b/test_runner/regress/test_pageserver_layer_rolling.py @@ -22,7 +22,10 @@ CHECKPOINT_TIMEOUT_SECONDS = 60 async def run_worker_for_tenant( - env: NeonEnv, entries: int, tenant: TenantId, offset: int | None = None + env: NeonEnv, + entries: int, + tenant: TenantId, + offset: int | None = None, ) -> Lsn: if offset is None: offset = 0 @@ -37,12 +40,20 @@ async def run_worker_for_tenant( finally: await conn.close(timeout=10) - last_flush_lsn = Lsn(ep.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + loop = asyncio.get_running_loop() + sql = await loop.run_in_executor( + None, lambda ep: ep.safe_psql("SELECT pg_current_wal_flush_lsn()"), ep + ) + last_flush_lsn = Lsn(sql[0][0]) return last_flush_lsn async def run_worker(env: NeonEnv, tenant_conf, entries: int) -> tuple[TenantId, TimelineId, Lsn]: - tenant, timeline = env.create_tenant(conf=tenant_conf) + loop = asyncio.get_running_loop() + # capture tenant_conf by specifying `tenant_conf=tenant_conf`, otherwise it will be evaluated to some random value + tenant, timeline = await loop.run_in_executor( + None, lambda tenant_conf, env: env.create_tenant(conf=tenant_conf), tenant_conf, env + ) last_flush_lsn = await run_worker_for_tenant(env, entries, tenant) return tenant, timeline, last_flush_lsn From a1b0558493930dca4f5c86db658ad295b9beabd6 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 19 Dec 2024 11:04:17 +0100 Subject: [PATCH 45/89] fast import: importer: use aws s3 cli (#10162) ## Problem s5cmd doesn't pick up the pod service account ``` 2024/12/16 16:26:01 Ignoring, HTTP credential provider invalid endpoint host, "169.254.170.23", only loopback hosts are allowed. ERROR "ls s3://neon-dev-bulk-import-us-east-2/import-pgdata/fast-import/v1/br-wandering-hall-w2xobawv": NoCredentialProviders: no valid providers in chain. Deprecated. For verbose messaging see aws.Config.CredentialsChainVerboseErrors ``` ## Summary of changes Switch to offical CLI. ## Testing Tested the pre-merge image in staging, using `job_image` override in project settings. https://neondb.slack.com/archives/C033RQ5SPDH/p1734554944391949?thread_ts=1734368383.258759&cid=C033RQ5SPDH ## Future Work Switch back to s5cmd once https://github.com/peak/s5cmd/pull/769 gets merged. ## Refs - fixes https://github.com/neondatabase/cloud/issues/21876 --------- Co-authored-by: Gleb Novikov --- compute/compute-node.Dockerfile | 24 ++++++++++--------- compute_tools/src/bin/fast_import.rs | 10 ++++---- .../fast_import/{s5cmd.rs => aws_s3_sync.rs} | 13 ++++------ 3 files changed, 23 insertions(+), 24 deletions(-) rename compute_tools/src/bin/fast_import/{s5cmd.rs => aws_s3_sync.rs} (50%) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 9f1f3b7343..5e7b4e8287 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1556,28 +1556,30 @@ RUN apt update && \ locales \ procps \ ca-certificates \ + curl \ + unzip \ $VERSION_INSTALLS && \ apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 -# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2 -# used by fast_import +# aws cli is used by fast_import (curl and unzip above are at this time only used for this installation step) ARG TARGETARCH -ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb RUN set -ex; \ - \ - # Determine the expected checksum based on TARGETARCH if [ "${TARGETARCH}" = "amd64" ]; then \ - CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \ + TARGETARCH_ALT="x86_64"; \ + CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \ elif [ "${TARGETARCH}" = "arm64" ]; then \ - CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \ + TARGETARCH_ALT="aarch64"; \ + CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \ else \ echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \ fi; \ - \ - # Compute and validate the checksum - echo "${CHECKSUM} /tmp/s5cmd.deb" | sha256sum -c - -RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb + curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \ + echo "${CHECKSUM} /tmp/awscliv2.zip" | sha256sum -c -; \ + unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \ + /tmp/awscliv2/aws/install; \ + rm -rf /tmp/awscliv2.zip /tmp/awscliv2; \ + true ENV LANG=en_US.utf8 USER postgres diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs index b6db3eb11a..793ec4cf10 100644 --- a/compute_tools/src/bin/fast_import.rs +++ b/compute_tools/src/bin/fast_import.rs @@ -34,12 +34,12 @@ use nix::unistd::Pid; use tracing::{info, info_span, warn, Instrument}; use utils::fs_ext::is_directory_empty; +#[path = "fast_import/aws_s3_sync.rs"] +mod aws_s3_sync; #[path = "fast_import/child_stdio_to_log.rs"] mod child_stdio_to_log; #[path = "fast_import/s3_uri.rs"] mod s3_uri; -#[path = "fast_import/s5cmd.rs"] -mod s5cmd; #[derive(clap::Parser)] struct Args { @@ -326,7 +326,7 @@ pub(crate) async fn main() -> anyhow::Result<()> { } info!("upload pgdata"); - s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/")) + aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/")) .await .context("sync dump directory to destination")?; @@ -334,10 +334,10 @@ pub(crate) async fn main() -> anyhow::Result<()> { { let status_dir = working_directory.join("status"); std::fs::create_dir(&status_dir).context("create status directory")?; - let status_file = status_dir.join("status"); + let status_file = status_dir.join("pgdata"); std::fs::write(&status_file, serde_json::json!({"done": true}).to_string()) .context("write status file")?; - s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata")) + aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/")) .await .context("sync status directory to destination")?; } diff --git a/compute_tools/src/bin/fast_import/s5cmd.rs b/compute_tools/src/bin/fast_import/aws_s3_sync.rs similarity index 50% rename from compute_tools/src/bin/fast_import/s5cmd.rs rename to compute_tools/src/bin/fast_import/aws_s3_sync.rs index d2d9a79736..5fa58c8f87 100644 --- a/compute_tools/src/bin/fast_import/s5cmd.rs +++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs @@ -4,24 +4,21 @@ use camino::Utf8Path; use super::s3_uri::S3Uri; pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> { - let mut builder = tokio::process::Command::new("s5cmd"); - // s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL - if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") { - builder.arg("--endpoint-url").arg(val); - } + let mut builder = tokio::process::Command::new("aws"); builder + .arg("s3") .arg("sync") .arg(local.as_str()) .arg(remote.to_string()); let st = builder .spawn() - .context("spawn s5cmd")? + .context("spawn aws s3 sync")? .wait() .await - .context("wait for s5cmd")?; + .context("wait for aws s3 sync")?; if st.success() { Ok(()) } else { - Err(anyhow::anyhow!("s5cmd failed")) + Err(anyhow::anyhow!("aws s3 sync failed")) } } From 43dc03459d42ec4c7ca028e48f5a0d8994ecf983 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Thu, 19 Dec 2024 11:25:44 +0100 Subject: [PATCH 46/89] Run pgbench on 10 GB scale factor on database with n relations (e.g. 10k) (#10172) ## Problem We want to verify how much / if pgbench throughput and latency on Neon suffers if the database contains many other relations, too. ## Summary of changes Modify the benchmarking.yml pgbench-compare job to - create an addiitional project at scale factor 10 GiB - before running pgbench add n tables (initially 10k) to the database - then compare the pgbench throughput and latency to the existing pgbench-compare at 10 Gib scale factor We use a realistic template for the n relations that is a partitioned table with some realistic data types, indexes and constraints - similar to a table that we use internally. Example run: https://github.com/neondatabase/neon/actions/runs/12377565956/job/34547386959 --- .github/workflows/benchmarking.yml | 25 ++- .../many_relations/create_many_relations.sql | 199 ++++++++++++++++++ .../performance/test_perf_many_relations.py | 66 ++++++ 3 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 test_runner/performance/many_relations/create_many_relations.sql create mode 100644 test_runner/performance/test_perf_many_relations.py diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index bbdcf5ef49..ab0f2a6155 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -308,6 +308,7 @@ jobs: "image": [ "'"$image_default"'" ], "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" }, { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, + { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" }, { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" }, @@ -410,7 +411,7 @@ jobs: aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Create Neon Project - if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) + if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) id: create-neon-project uses: ./.github/actions/neon-project-create with: @@ -429,7 +430,7 @@ jobs: neonvm-captest-sharding-reuse) CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }} ;; - neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier) + neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier) CONNSTR=${{ steps.create-neon-project.outputs.dsn }} ;; rds-aurora) @@ -446,6 +447,26 @@ jobs: echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB + # without (neonvm-captest-new) + # and with (neonvm-captest-new-many-tables) many relations in the database + - name: Create many relations before the run + if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform) + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: ${{ env.SAVE_PERF_REPORT }} + extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations + pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + TEST_NUM_RELATIONS: 10000 + - name: Benchmark init uses: ./.github/actions/run-python-test-set with: diff --git a/test_runner/performance/many_relations/create_many_relations.sql b/test_runner/performance/many_relations/create_many_relations.sql new file mode 100644 index 0000000000..1b3673c9e1 --- /dev/null +++ b/test_runner/performance/many_relations/create_many_relations.sql @@ -0,0 +1,199 @@ +-- create a schema that simulates Neon control plane operations table +-- however use partitioned operations tables with many (e.g. 500) child partition tables per table +-- in summary we create multiple of these partitioned operations tables (with 500 childs each) - until we reach the requested number of tables + + +-- first we need some other tables that can be referenced by the operations table + +-- Table for branches +CREATE TABLE public.branches ( + id text PRIMARY KEY +); + +-- Table for endpoints +CREATE TABLE public.endpoints ( + id text PRIMARY KEY +); + +-- Table for projects +CREATE TABLE public.projects ( + id text PRIMARY KEY +); + +INSERT INTO public.branches (id) +VALUES ('branch_1'); + +-- Insert one row into endpoints +INSERT INTO public.endpoints (id) +VALUES ('endpoint_1'); + +-- Insert one row into projects +INSERT INTO public.projects (id) +VALUES ('project_1'); + +-- now we create a procedure that can create n operations tables +-- we do that in a procedure to save roundtrip latency when scaling the test to many tables +-- prefix is the base table name, e.g. 'operations_scale_1000' if we create 1000 tables +CREATE OR REPLACE PROCEDURE create_partitioned_tables(prefix text, n INT) +LANGUAGE plpgsql AS $$ +DECLARE + table_name TEXT; -- Variable to hold table names dynamically + i INT; -- Counter for the loop +BEGIN + -- Loop to create n partitioned tables + FOR i IN 1..n LOOP + table_name := format('%s_%s', prefix, i); + + -- Create the partitioned table + EXECUTE format( + 'CREATE TABLE public.%s ( + project_id character varying NOT NULL, + id uuid NOT NULL, + status integer, + action character varying NOT NULL, + error character varying, + created_at timestamp with time zone NOT NULL DEFAULT now(), + updated_at timestamp with time zone NOT NULL DEFAULT now(), + spec jsonb, + retry_at timestamp with time zone, + failures_count integer DEFAULT 0, + metadata jsonb NOT NULL DEFAULT ''{}''::jsonb, + executor_id text NOT NULL, + attempt_duration_ms integer, + metrics jsonb DEFAULT ''{}''::jsonb, + branch_id text, + endpoint_id text, + next_operation_id uuid, + compute_id text, + connection_attempt_at timestamp with time zone, + concurrency_key text, + queue_id text, + CONSTRAINT %s_pkey PRIMARY KEY (id, created_at), + CONSTRAINT %s_branch_id_fk FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE, + CONSTRAINT %s_endpoint_id_fk FOREIGN KEY (endpoint_id) REFERENCES endpoints(id) ON DELETE CASCADE, + CONSTRAINT %s_next_operation_id_fk FOREIGN KEY (next_operation_id, created_at) REFERENCES %s(id, created_at), + CONSTRAINT %s_project_id_fk FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE + ) PARTITION BY RANGE (created_at)', + table_name, table_name, table_name, table_name, table_name, table_name, table_name + ); + + -- Add indexes for the partitioned table + EXECUTE format('CREATE INDEX index_%s_on_next_operation_id ON public.%s (next_operation_id)', table_name, table_name); + EXECUTE format('CREATE INDEX index_%s_on_project_id ON public.%s (project_id)', table_name, table_name); + EXECUTE format('CREATE INDEX %s_branch_id ON public.%s (branch_id)', table_name, table_name); + EXECUTE format('CREATE INDEX %s_branch_id_created_idx ON public.%s (branch_id, created_at)', table_name, table_name); + EXECUTE format('CREATE INDEX %s_created_at_idx ON public.%s (created_at)', table_name, table_name); + EXECUTE format('CREATE INDEX %s_created_at_project_id_id_cond_idx ON public.%s (created_at, project_id, id)', table_name, table_name); + EXECUTE format('CREATE INDEX %s_endpoint_id ON public.%s (endpoint_id)', table_name, table_name); + EXECUTE format( + 'CREATE INDEX %s_for_redo_worker_idx ON public.%s (executor_id) WHERE status <> 1', + table_name, table_name + ); + EXECUTE format( + 'CREATE INDEX %s_project_id_status_index ON public.%s ((project_id::text), status)', + table_name, table_name + ); + EXECUTE format( + 'CREATE INDEX %s_status_not_finished ON public.%s (status) WHERE status <> 1', + table_name, table_name + ); + EXECUTE format('CREATE INDEX %s_updated_at_desc_idx ON public.%s (updated_at DESC)', table_name, table_name); + EXECUTE format( + 'CREATE INDEX %s_with_failures ON public.%s (failures_count) WHERE failures_count > 0', + table_name, table_name + ); + END LOOP; +END; +$$; + +-- next we create a procedure that can add the child partitions (one per day) to each of the operations tables +CREATE OR REPLACE PROCEDURE create_operations_partitions( + table_name TEXT, + start_date DATE, + end_date DATE +) +LANGUAGE plpgsql AS $$ +DECLARE + partition_date DATE; + partition_name TEXT; + counter INT := 0; -- Counter to track the number of tables created in the current transaction +BEGIN + partition_date := start_date; + + -- Create partitions in batches + WHILE partition_date < end_date LOOP + partition_name := format('%s_%s', table_name, to_char(partition_date,'YYYY_MM_DD')); + + EXECUTE format( + 'CREATE TABLE IF NOT EXISTS public.%s PARTITION OF public.%s + FOR VALUES FROM (''%s'') TO (''%s'')', + partition_name, + table_name, + partition_date, + partition_date + INTERVAL '1 day' + ); + + counter := counter + 1; + + -- Commit and reset counter after every 100 partitions + IF counter >= 100 THEN + COMMIT; + counter := 0; -- Reset the counter + END IF; + + -- Advance to the next day + partition_date := partition_date + INTERVAL '1 day'; + END LOOP; + + -- Final commit for remaining partitions + IF counter > 0 THEN + COMMIT; + END IF; + + -- Insert synthetic rows into each partition + EXECUTE format( + 'INSERT INTO %I ( + project_id, + branch_id, + endpoint_id, + id, + status, + action, + created_at, + updated_at, + spec, + metadata, + executor_id, + failures_count + ) + SELECT + ''project_1'', -- project_id + ''branch_1'', -- branch_id + ''endpoint_1'', -- endpoint_id + ''e8bba687-0df9-4291-bfcd-7d5f6aa7c158'', -- unique id + 1, -- status + ''SYNTHETIC_ACTION'', -- action + gs::timestamp + interval ''0 ms'', -- created_at + gs::timestamp + interval ''1 minute'', -- updated_at + ''{"key": "value"}'', -- spec (JSONB) + ''{"metadata_key": "metadata_value"}'', -- metadata (JSONB) + ''executor_1'', -- executor_id + 0 -- failures_count + FROM generate_series(%L, %L::DATE - INTERVAL ''1 day'', INTERVAL ''1 day'') AS gs', + table_name, start_date, end_date + ); + + -- Commit the inserted rows + COMMIT; +END; +$$; + +-- we can now create partitioned tables using something like +-- CALL create_partitioned_tables('operations_scale_1000' ,10); + +-- and we can create the child partitions for a table using something like +-- CALL create_operations_partitions( +-- 'operations_scale_1000_1', +-- '2000-01-01', -- Start date +-- ('2000-01-01'::DATE + INTERVAL '1 day' * 500)::DATE -- End date (start date + number of days) +-- ); diff --git a/test_runner/performance/test_perf_many_relations.py b/test_runner/performance/test_perf_many_relations.py new file mode 100644 index 0000000000..0ee0efe8b9 --- /dev/null +++ b/test_runner/performance/test_perf_many_relations.py @@ -0,0 +1,66 @@ +import os +from pathlib import Path + +import pytest +from fixtures.compare_fixtures import RemoteCompare +from fixtures.log_helper import log + + +def get_num_relations(default: int = 1000) -> list[int]: + # We parametrize each run with scale specifying the number of wanted child partitions. + # Databases are pre-created and passed through BENCHMARK_CONNSTR env variable. + scales = os.getenv("TEST_NUM_RELATIONS", default=str(default)) + rv = [] + for s in scales.split(","): + scale = int(s) + rv.append(scale) + return rv + + +@pytest.mark.parametrize("num_relations", get_num_relations()) +@pytest.mark.remote_cluster +def test_perf_many_relations(remote_compare: RemoteCompare, num_relations: int): + """ + Test creating many relations in a single database. + We use partitioned tables with child tables, indexes and constraints to have a realistic schema. + Also we include some common data types like text, uuid, timestamp, JSONB, etc. + + see many_relations/create_many_relations.sql + """ + env = remote_compare + + # prepare some base tables and the plpgsql procedures that we use to create the tables + sql_file = Path(__file__).parent / "many_relations" / "create_many_relations.sql" + env.pg_bin.run_capture(["psql", env.pg.connstr(), "-f", str(sql_file)]) + + num_parent_tables = num_relations // 500 + 1 + log.info(f"Creating {num_relations} relations in {num_parent_tables} parent tables") + + log.info(f"Creating {num_parent_tables} parent tables") + sql = f"CALL create_partitioned_tables('operations_scale_{num_relations}', {num_parent_tables})" + log.info(sql) + env.pg_bin.run_capture(["psql", env.pg.connstr(), "-c", sql]) + + current_table = 0 + num_relations_remaining = num_relations + + # now run and measure the actual relation creation + while num_relations_remaining > 0: + current_table += 1 + parent_table_name = f"operations_scale_{num_relations}_{current_table}" + if num_relations_remaining > 500: + num_relations_to_create = 500 + else: + num_relations_to_create = num_relations_remaining + num_relations_remaining -= num_relations_to_create + log.info( + f"Creating {num_relations_to_create} child tables in partitioned parent table '{parent_table_name}'" + ) + sql = f"CALL create_operations_partitions( '{parent_table_name}', '2000-01-01', ('2000-01-01'::DATE + INTERVAL '1 day' * {num_relations_to_create})::DATE)" + log.info(sql) + with env.zenbenchmark.record_duration( + f"CREATE_TABLE/{current_table}/{num_relations_to_create}" + ): + env.pg_bin.run_capture( + ["psql", env.pg.connstr(options="-cstatement_timeout=1000s "), "-c", sql] + ) From b135194090369d8e5452c9ee1c6e7c37cc9ba8bd Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Thu, 19 Dec 2024 11:37:08 +0100 Subject: [PATCH 47/89] proxy: Delay SASL complete message until auth is done (#10189) The final SASL complete message can be bundled with the remainder of the auth flow messages until ReadyForQuery. neondatabase/cloud#19184 --- proxy/src/auth/backend/mod.rs | 3 +++ proxy/src/sasl/stream.rs | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 50cb94bfa0..0c9a7f7825 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -678,6 +678,9 @@ mod tests { .await .unwrap(); + // flush the final server message + stream.flush().await.unwrap(); + handle.await.unwrap(); } diff --git a/proxy/src/sasl/stream.rs b/proxy/src/sasl/stream.rs index f1c916daa2..ac77556566 100644 --- a/proxy/src/sasl/stream.rs +++ b/proxy/src/sasl/stream.rs @@ -50,6 +50,12 @@ impl SaslStream<'_, S> { self.stream.write_message(&msg.to_reply()).await?; Ok(()) } + + // Queue a SASL message for the client. + fn send_noflush(&mut self, msg: &ServerMessage<&str>) -> io::Result<()> { + self.stream.write_message_noflush(&msg.to_reply())?; + Ok(()) + } } /// SASL authentication outcome. @@ -85,7 +91,7 @@ impl SaslStream<'_, S> { continue; } Step::Success(result, reply) => { - self.send(&ServerMessage::Final(&reply)).await?; + self.send_noflush(&ServerMessage::Final(&reply))?; Outcome::Success(result) } Step::Failure(reason) => Outcome::Failure(reason), From 65042cbadd0426c43499bb7675e671b5c6e980e9 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 19 Dec 2024 10:58:49 +0000 Subject: [PATCH 48/89] tests: use high IO concurrency in `test_pgdata_import_smoke`, use `effective_io_concurrency=2` in tests by default (#10114) ## Problem `test_pgdata_import_smoke` writes two gigabytes of pages and then reads them back serially. This is CPU bottlenecked and results in a long runtime, and sensitivity to CPU load from other tests on the same machine. Closes: https://github.com/neondatabase/neon/issues/10071 ## Summary of changes - Use effective_io_concurrency=32 when doing sequential scans through 2GiB of pages in test_pgdata_import_smoke. This is a ~10x runtime decrease in the parts of the test that do sequential scans. - Also set `effective_io_concurrency=2` for tests, as I noticed while debugging that we were doing all getpage requests serially, which is bad for checking the stability of the batching code. --- control_plane/src/endpoint.rs | 4 ++++ test_runner/regress/test_import_pgdata.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 1fdf326051..5ebf842813 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -316,6 +316,10 @@ impl Endpoint { // and can cause errors like 'no unpinned buffers available', see // conf.append("shared_buffers", "1MB"); + // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's + // batching logic. Set this to 2 so that we exercise the code a bit without letting + // individual tests do a lot of concurrent work on underpowered test machines + conf.append("effective_io_concurrency", "2"); conf.append("fsync", "off"); conf.append("max_connections", "100"); conf.append("wal_level", "logical"); diff --git a/test_runner/regress/test_import_pgdata.py b/test_runner/regress/test_import_pgdata.py index 29229b73c1..6ea2393a9d 100644 --- a/test_runner/regress/test_import_pgdata.py +++ b/test_runner/regress/test_import_pgdata.py @@ -84,6 +84,8 @@ def test_pgdata_import_smoke( elif rel_block_size == RelBlockSize.TWO_STRPES_PER_SHARD: target_relblock_size = (shard_count or 1) * stripe_size * 8192 * 2 elif rel_block_size == RelBlockSize.MULTIPLE_RELATION_SEGMENTS: + # Postgres uses a 1GiB segment size, fixed at compile time, so we must use >2GB of data + # to exercise multiple segments. target_relblock_size = int(((2.333 * 1024 * 1024 * 1024) // 8192) * 8192) else: raise ValueError @@ -111,9 +113,15 @@ def test_pgdata_import_smoke( def validate_vanilla_equivalence(ep): # TODO: would be nicer to just compare pgdump - assert ep.safe_psql("select count(*), sum(data::bigint)::bigint from t") == [ - (expect_nrows, expect_sum) - ] + + # Enable IO concurrency for batching on large sequential scan, to avoid making + # this test unnecessarily onerous on CPU + assert ep.safe_psql_many( + [ + "set effective_io_concurrency=32;", + "select count(*), sum(data::bigint)::bigint from t", + ] + ) == [[], [(expect_nrows, expect_sum)]] validate_vanilla_equivalence(vanilla_pg) From afda6d4700ca0b521ecb412f0bc81e80e5903dbd Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 19 Dec 2024 12:55:05 +0000 Subject: [PATCH 49/89] storage_scrubber: don't report half-created timelines as corruption (#10198) ## Problem test_timeline_archival_chaos does timeline creation with failure injection, and thereby sometimes leaves timelines in a part created state. This was being reported as corruption by the scrubber on test teardown, because it considered a layer without an index to be an invalid state. This was incorrect: the scrubber should accept this state, it occurs legitimately during timeline creation. Closes: https://github.com/neondatabase/neon/issues/9988 ## Summary of changes - Report a timeline with layers but no index as Relic rather than MissingIndexPart. - We retain the MissingIndexPart variant for the case where an index _was_ found in the listing, but was not found by a subsequent GET, i.e. racing with deletion. --- storage_scrubber/src/checks.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index f759f54d19..32c86052ef 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -310,7 +310,7 @@ pub(crate) enum BlobDataParseResult { index_part_generation: Generation, s3_layers: HashSet<(LayerName, Generation)>, }, - /// The remains of a deleted Timeline (i.e. an initdb archive only) + /// The remains of an uncleanly deleted Timeline or aborted timeline creation(e.g. an initdb archive only, or some layer without an index) Relic, Incorrect { errors: Vec, @@ -346,7 +346,7 @@ pub(crate) async fn list_timeline_blobs( match res { ListTimelineBlobsResult::Ready(data) => Ok(data), ListTimelineBlobsResult::MissingIndexPart(_) => { - // Retry if index is missing. + // Retry if listing raced with removal of an index let data = list_timeline_blobs_impl(remote_client, id, root_target) .await? .into_data(); @@ -358,7 +358,7 @@ pub(crate) async fn list_timeline_blobs( enum ListTimelineBlobsResult { /// Blob data is ready to be intepreted. Ready(RemoteTimelineBlobData), - /// List timeline blobs has layer files but is missing [`IndexPart`]. + /// The listing contained an index but when we tried to fetch it, we couldn't MissingIndexPart(RemoteTimelineBlobData), } @@ -467,19 +467,19 @@ async fn list_timeline_blobs_impl( match index_part_object.as_ref() { Some(selected) => index_part_keys.retain(|k| k != selected), None => { - // It is possible that the branch gets deleted after we got some layer files listed - // and we no longer have the index file in the listing. - errors.push( + // This case does not indicate corruption, but it should be very unusual. It can + // happen if: + // - timeline creation is in progress (first layer is written before index is written) + // - timeline deletion happened while a stale pageserver was still attached, it might upload + // a layer after the deletion is done. + tracing::info!( "S3 list response got no index_part.json file but still has layer files" - .to_string(), ); - return Ok(ListTimelineBlobsResult::MissingIndexPart( - RemoteTimelineBlobData { - blob_data: BlobDataParseResult::Incorrect { errors, s3_layers }, - unused_index_keys: index_part_keys, - unknown_keys, - }, - )); + return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData { + blob_data: BlobDataParseResult::Relic, + unused_index_keys: index_part_keys, + unknown_keys, + })); } } From 502d512fe2ca9f07392428d70d5262cf3f5103e2 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 19 Dec 2024 14:04:42 +0000 Subject: [PATCH 50/89] safekeeper: lift benchmarking utils into safekeeper crate (#10200) ## Problem The benchmarking utilities are also useful for testing. We want to write tests in the safekeeper crate. ## Summary of changes This commit lifts the utils to the safekeeper crate. They are compiled if the benchmarking features is enabled or if in test mode. --- libs/postgres_ffi/src/wal_generator.rs | 6 ++-- safekeeper/Cargo.toml | 2 ++ safekeeper/benches/receive_wal.rs | 23 +++++++-------- safekeeper/src/lib.rs | 3 ++ .../benchutils.rs => src/test_utils.rs} | 28 ++++++++++--------- .../tests/walproposer_sim/walproposer_disk.rs | 2 +- 6 files changed, 36 insertions(+), 28 deletions(-) rename safekeeper/{benches/benchutils.rs => src/test_utils.rs} (78%) diff --git a/libs/postgres_ffi/src/wal_generator.rs b/libs/postgres_ffi/src/wal_generator.rs index 69cc4b771f..a72b035e17 100644 --- a/libs/postgres_ffi/src/wal_generator.rs +++ b/libs/postgres_ffi/src/wal_generator.rs @@ -106,11 +106,11 @@ impl WalGenerator { const TIMELINE_ID: u32 = 1; /// Creates a new WAL generator with the given record generator. - pub fn new(record_generator: R) -> WalGenerator { + pub fn new(record_generator: R, start_lsn: Lsn) -> WalGenerator { Self { record_generator, - lsn: Lsn(0), - prev_lsn: Lsn(0), + lsn: start_lsn, + prev_lsn: start_lsn, } } diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml index 086407603f..3ebb7097f2 100644 --- a/safekeeper/Cargo.toml +++ b/safekeeper/Cargo.toml @@ -9,6 +9,7 @@ default = [] # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro, # which adds some runtime cost to run tests on outage conditions testing = ["fail/failpoints"] +benchmarking = [] [dependencies] async-stream.workspace = true @@ -77,3 +78,4 @@ tracing-subscriber = { workspace = true, features = ["json"] } [[bench]] name = "receive_wal" harness = false +required-features = ["benchmarking"] diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs index 313d945b94..996c4d9b8c 100644 --- a/safekeeper/benches/receive_wal.rs +++ b/safekeeper/benches/receive_wal.rs @@ -1,11 +1,7 @@ //! WAL ingestion benchmarks. -#[path = "benchutils.rs"] -mod benchutils; - use std::io::Write as _; -use benchutils::Env; use bytes::BytesMut; use camino_tempfile::tempfile; use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion}; @@ -16,6 +12,7 @@ use safekeeper::receive_wal::{self, WalAcceptor}; use safekeeper::safekeeper::{ AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, }; +use safekeeper::test_utils::Env; use tokio::io::AsyncWriteExt as _; use utils::id::{NodeId, TenantTimelineId}; use utils::lsn::Lsn; @@ -76,12 +73,15 @@ fn bench_process_msg(c: &mut Criterion) { assert!(size >= prefixlen); let message = vec![0; size - prefixlen]; - let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message)); + let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), Lsn(0)); // Set up the Safekeeper. let env = Env::new(fsync)?; - let mut safekeeper = - runtime.block_on(env.make_safekeeper(NodeId(1), TenantTimelineId::generate()))?; + let mut safekeeper = runtime.block_on(env.make_safekeeper( + NodeId(1), + TenantTimelineId::generate(), + Lsn(0), + ))?; b.iter_batched_ref( // Pre-construct WAL records and requests. Criterion will batch them. @@ -134,7 +134,8 @@ fn bench_wal_acceptor(c: &mut Criterion) { let runtime = tokio::runtime::Runtime::new()?; // needs multithreaded let env = Env::new(fsync)?; - let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(c"prefix", b"message")); + let walgen = + &mut WalGenerator::new(LogicalMessageGenerator::new(c"prefix", b"message"), Lsn(0)); // Create buffered channels that can fit all requests, to avoid blocking on channels. let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(n); @@ -145,7 +146,7 @@ fn bench_wal_acceptor(c: &mut Criterion) { // TODO: WalAcceptor doesn't actually need a full timeline, only // Safekeeper::process_msg(). Consider decoupling them to simplify the setup. let tli = env - .make_timeline(NodeId(1), TenantTimelineId::generate()) + .make_timeline(NodeId(1), TenantTimelineId::generate(), Lsn(0)) .await? .wal_residence_guard() .await?; @@ -239,7 +240,7 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) { assert!(size >= prefixlen); let message = vec![0; size - prefixlen]; - let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message)); + let walgen = &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), Lsn(0)); // Construct and spawn the WalAcceptor task. let env = Env::new(fsync)?; @@ -249,7 +250,7 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) { runtime.block_on(async { let tli = env - .make_timeline(NodeId(1), TenantTimelineId::generate()) + .make_timeline(NodeId(1), TenantTimelineId::generate(), Lsn(0)) .await? .wal_residence_guard() .await?; diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs index abe6e00a66..7acf355e6a 100644 --- a/safekeeper/src/lib.rs +++ b/safekeeper/src/lib.rs @@ -43,6 +43,9 @@ pub mod wal_reader_stream; pub mod wal_service; pub mod wal_storage; +#[cfg(any(test, feature = "benchmarking"))] +pub mod test_utils; + mod timelines_global_map; use std::sync::Arc; pub use timelines_global_map::GlobalTimelines; diff --git a/safekeeper/benches/benchutils.rs b/safekeeper/src/test_utils.rs similarity index 78% rename from safekeeper/benches/benchutils.rs rename to safekeeper/src/test_utils.rs index 48d796221b..c40a8bae5a 100644 --- a/safekeeper/benches/benchutils.rs +++ b/safekeeper/src/test_utils.rs @@ -1,18 +1,18 @@ use std::sync::Arc; +use crate::rate_limit::RateLimiter; +use crate::safekeeper::{ProposerAcceptorMessage, ProposerElected, SafeKeeper, TermHistory}; +use crate::state::{TimelinePersistentState, TimelineState}; +use crate::timeline::{get_timeline_dir, SharedState, StateSK, Timeline}; +use crate::timelines_set::TimelinesSet; +use crate::wal_backup::remote_timeline_path; +use crate::{control_file, wal_storage, SafeKeeperConf}; use camino_tempfile::Utf8TempDir; -use safekeeper::rate_limit::RateLimiter; -use safekeeper::safekeeper::{ProposerAcceptorMessage, ProposerElected, SafeKeeper, TermHistory}; -use safekeeper::state::{TimelinePersistentState, TimelineState}; -use safekeeper::timeline::{get_timeline_dir, SharedState, StateSK, Timeline}; -use safekeeper::timelines_set::TimelinesSet; -use safekeeper::wal_backup::remote_timeline_path; -use safekeeper::{control_file, wal_storage, SafeKeeperConf}; use tokio::fs::create_dir_all; use utils::id::{NodeId, TenantTimelineId}; use utils::lsn::Lsn; -/// A Safekeeper benchmarking environment. Uses a tempdir for storage, removed on drop. +/// A Safekeeper testing or benchmarking environment. Uses a tempdir for storage, removed on drop. pub struct Env { /// Whether to enable fsync. pub fsync: bool, @@ -21,7 +21,7 @@ pub struct Env { } impl Env { - /// Creates a new benchmarking environment in a temporary directory. fsync controls whether to + /// Creates a new test or benchmarking environment in a temporary directory. fsync controls whether to /// enable fsyncing. pub fn new(fsync: bool) -> anyhow::Result { let tempdir = camino_tempfile::tempdir()?; @@ -47,6 +47,7 @@ impl Env { &self, node_id: NodeId, ttid: TenantTimelineId, + start_lsn: Lsn, ) -> anyhow::Result> { let conf = self.make_conf(node_id); @@ -67,9 +68,9 @@ impl Env { safekeeper .process_msg(&ProposerAcceptorMessage::Elected(ProposerElected { term: 1, - start_streaming_at: Lsn(0), - term_history: TermHistory(vec![(1, Lsn(0)).into()]), - timeline_start_lsn: Lsn(0), + start_streaming_at: start_lsn, + term_history: TermHistory(vec![(1, start_lsn).into()]), + timeline_start_lsn: start_lsn, })) .await?; @@ -82,12 +83,13 @@ impl Env { &self, node_id: NodeId, ttid: TenantTimelineId, + start_lsn: Lsn, ) -> anyhow::Result> { let conf = Arc::new(self.make_conf(node_id)); let timeline_dir = get_timeline_dir(&conf, &ttid); let remote_path = remote_timeline_path(&ttid)?; - let safekeeper = self.make_safekeeper(node_id, ttid).await?; + let safekeeper = self.make_safekeeper(node_id, ttid, start_lsn).await?; let shared_state = SharedState::new(StateSK::Loaded(safekeeper)); let timeline = Timeline::new( diff --git a/safekeeper/tests/walproposer_sim/walproposer_disk.rs b/safekeeper/tests/walproposer_sim/walproposer_disk.rs index aefb3919a1..7dc7f48548 100644 --- a/safekeeper/tests/walproposer_sim/walproposer_disk.rs +++ b/safekeeper/tests/walproposer_sim/walproposer_disk.rs @@ -18,7 +18,7 @@ impl DiskWalProposer { internal_available_lsn: Lsn(0), prev_lsn: Lsn(0), disk: BlockStorage::new(), - wal_generator: WalGenerator::new(LogicalMessageGenerator::new(c"", &[])), + wal_generator: WalGenerator::new(LogicalMessageGenerator::new(c"", &[]), Lsn(0)), }), }) } From 628451d68ec30c0fb591e14a64f696b031d7ca88 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 19 Dec 2024 14:04:46 +0000 Subject: [PATCH 51/89] safekeeper: short-circuit interpreted wal sender (#10202) ## Problem Safekeeper may currently send a batch to the pageserver even if it hasn't decoded a new record. I think this is quite unlikely in the field, but worth adressing. ## Summary of changes Don't send anything if we haven't decoded a full record. Once this merges and releases, the `InterpretedWalRecords` struct can be updated to remove the Option wrapper for `next_record_lsn`. --- safekeeper/src/send_interpreted_wal.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index 2589030422..7d215176dd 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -94,9 +94,14 @@ impl InterpretedWalSender<'_, IO> { } } + let max_next_record_lsn = match max_next_record_lsn { + Some(lsn) => lsn, + None => { continue; } + }; + let batch = InterpretedWalRecords { records, - next_record_lsn: max_next_record_lsn + next_record_lsn: Some(max_next_record_lsn), }; tx.send(Batch {wal_end_lsn, available_wal_end_lsn, records: batch}).await.unwrap(); From 04517c6ff3db53b7145aff41a9de208648194a6d Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 19 Dec 2024 17:22:39 +0200 Subject: [PATCH 52/89] Do not reload config file on PS reconnect (#10204) ## Problem See https://github.com/neondatabase/neon/issues/10184 and https://neondb.slack.com/archives/C04DGM6SMTM/p1733997259898819 Reloading config file inside parallel worker cause it's termination ## Summary of changes Remove call of `HandleMainLoopInterrupts()` Update of page server URL is propagated by postmaster through shared memory and we should not reload config for it. Co-authored-by: Konstantin Knizhnik --- pgxn/neon/libpagestore.c | 1 - 1 file changed, 1 deletion(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 6513ba4dd6..88d0a5292b 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -827,7 +827,6 @@ pageserver_send(shardno_t shard_no, NeonRequest *request) { while (!pageserver_connect(shard_no, shard->n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR)) { - HandleMainLoopInterrupts(); shard->n_reconnect_attempts += 1; } shard->n_reconnect_attempts = 0; From b89e02f3e8efccfb900685f2d6c1fe18d13cb956 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:04:53 -0500 Subject: [PATCH 53/89] fix(pageserver): consider partial compaction layer map in layer check (#10044) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem In https://github.com/neondatabase/neon/pull/9897 we temporarily disabled the layer valid check because the current one only considers the end result of all compaction algorithms, but partial gc-compaction would temporarily produce an "invalid" layer map. part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes Allow LSN splits to overlap in the slow path check. Currently, the valid check is only used in storage scrubber (background job) and during gc-compaction (without taking layer lock). Therefore, it's fine for such checks to be a little bit inefficient but more accurate. --------- Signed-off-by: Alex Chi Z Co-authored-by: Arpad Müller --- pageserver/src/tenant/checks.rs | 47 +++++++++++++------ pageserver/src/tenant/timeline/compaction.rs | 48 ++++++++++++++++---- 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs index 1e8fa8d1d6..f98356242e 100644 --- a/pageserver/src/tenant/checks.rs +++ b/pageserver/src/tenant/checks.rs @@ -1,12 +1,15 @@ use std::collections::BTreeSet; use itertools::Itertools; +use pageserver_compaction::helpers::overlaps_with; use super::storage_layer::LayerName; /// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong). /// -/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example, +/// The function implements a fast path check and a slow path check. +/// +/// The fast path checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example, /// /// ```plain /// | | | | @@ -25,31 +28,47 @@ use super::storage_layer::LayerName; /// | | | 4 | | | /// /// If layer 2 and 4 contain the same single key, this is also a valid layer map. +/// +/// However, if a partial compaction is still going on, it is possible that we get a layer map not satisfying the above condition. +/// Therefore, we fallback to simply check if any of the two delta layers overlap. (See "A slow path...") pub fn check_valid_layermap(metadata: &[LayerName]) -> Option { let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) let mut all_delta_layers = Vec::new(); for name in metadata { if let LayerName::Delta(layer) = name { - if layer.key_range.start.next() != layer.key_range.end { - all_delta_layers.push(layer.clone()); - } + all_delta_layers.push(layer.clone()); } } for layer in &all_delta_layers { - let lsn_range = &layer.lsn_range; - lsn_split_point.insert(lsn_range.start); - lsn_split_point.insert(lsn_range.end); + if layer.key_range.start.next() != layer.key_range.end { + let lsn_range = &layer.lsn_range; + lsn_split_point.insert(lsn_range.start); + lsn_split_point.insert(lsn_range.end); + } } - for layer in &all_delta_layers { + for (idx, layer) in all_delta_layers.iter().enumerate() { + if layer.key_range.start.next() == layer.key_range.end { + continue; + } let lsn_range = layer.lsn_range.clone(); let intersects = lsn_split_point.range(lsn_range).collect_vec(); if intersects.len() > 1 { - let err = format!( - "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", - layer, - intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") - ); - return Some(err); + // A slow path to check if the layer intersects with any other delta layer. + for (other_idx, other_layer) in all_delta_layers.iter().enumerate() { + if other_idx == idx { + // do not check self intersects with self + continue; + } + if overlaps_with(&layer.lsn_range, &other_layer.lsn_range) + && overlaps_with(&layer.key_range, &other_layer.key_range) + { + let err = format!( + "layer violates the layer map LSN split assumption: layer {} intersects with layer {}", + layer, other_layer + ); + return Some(err); + } + } } } None diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index a4e8f39522..01f2a5b170 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -29,6 +29,7 @@ use utils::id::TimelineId; use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}; use crate::page_cache; use crate::statvfs::Statvfs; +use crate::tenant::checks::check_valid_layermap; use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::storage_layer::batch_split_writer::{ BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter, @@ -2156,15 +2157,14 @@ impl Timeline { // Step 1: construct a k-merge iterator over all layers. // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point. - // disable the check for now because we need to adjust the check for partial compactions, will enable later. - // let layer_names = job_desc - // .selected_layers - // .iter() - // .map(|layer| layer.layer_desc().layer_name()) - // .collect_vec(); - // if let Some(err) = check_valid_layermap(&layer_names) { - // warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err); - // } + let layer_names = job_desc + .selected_layers + .iter() + .map(|layer| layer.layer_desc().layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err); + } // The maximum LSN we are processing in this compaction loop let end_lsn = job_desc .selected_layers @@ -2546,8 +2546,36 @@ impl Timeline { ); // Step 3: Place back to the layer map. + + // First, do a sanity check to ensure the newly-created layer map does not contain overlaps. + let all_layers = { + let guard = self.layers.read().await; + let layer_map = guard.layer_map()?; + layer_map.iter_historic_layers().collect_vec() + }; + + let mut final_layers = all_layers + .iter() + .map(|layer| layer.layer_name()) + .collect::>(); + for layer in &layer_selection { + final_layers.remove(&layer.layer_desc().layer_name()); + } + for layer in &compact_to { + final_layers.insert(layer.layer_desc().layer_name()); + } + let final_layers = final_layers.into_iter().collect_vec(); + + // TODO: move this check before we call `finish` on image layer writers. However, this will require us to get the layer name before we finish + // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are + // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails. + if let Some(err) = check_valid_layermap(&final_layers) { + bail!("gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err); + } + + // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only + // operate on L1 layers. { - // TODO: sanity check if the layer map is valid (i.e., should not have overlaps) let mut guard = self.layers.write().await; guard .open_mut()? From 197a89ab3dee0ff90b060c92032a1a8e0b3213a8 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 19 Dec 2024 20:32:32 +0200 Subject: [PATCH 54/89] =?UTF-8?q?Increase=20default=20stotrage=20controlle?= =?UTF-8?q?r=20heartbeat=20interval=20from=20100msec=20=E2=80=A6=20(#10206?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Currently default value of storage controller heartbeat interval is 100msec. It means that 10 times per second it establish connection to PS. And it seems to be quite expensive. At MacOS right now storage_controller consumes 70% CPU and trusts - 30%. So together they completely utilize one core. A lot of us has Macs. Let's save environment a little bit and do not waste electricity and contribute to global warming. By the way, on prod we have interval 10seconds ## Summary of changes Increase heartbeat interval from 100msec to 1 second. Co-authored-by: Konstantin Knizhnik --- control_plane/src/local_env.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 489f9c8509..5b82acb3a5 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -180,7 +180,7 @@ impl NeonStorageControllerConf { const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30); // Very tight heartbeat interval to speed up tests - const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100); + const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000); } impl Default for NeonStorageControllerConf { From 9c53b41245e3aecba30c2e05df4eeabe45fd39ac Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:40:20 -0500 Subject: [PATCH 55/89] fix(pageserver): update remote latest_gc_cutoff after gc-compaction (#10209) ## Problem close https://github.com/neondatabase/neon/issues/10208 part of #9114 ## Summary of changes * Ensure remote `latest_gc_cutoff` is up-to-date before removing any files for gc-compaction. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 01f2a5b170..94c65631b2 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -2581,6 +2581,13 @@ impl Timeline { .open_mut()? .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics) }; + + // Schedule an index-only upload to update the `latest_gc_cutoff` in the index_part.json. + // Otherwise, after restart, the index_part only contains the old `latest_gc_cutoff` and + // find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should + // be batched into `schedule_compaction_update`. + let disk_consistent_lsn = self.disk_consistent_lsn.load(); + self.schedule_uploads(disk_consistent_lsn, None)?; self.remote_client .schedule_compaction_update(&layer_selection, &compact_to)?; From f94248a5941dc0ba38ea8fc94ebc49016caef162 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 2 Jan 2025 09:35:28 +0000 Subject: [PATCH 56/89] chore(libs/proxy): refactor tokio-postgres connection control flow (#10247) In #10207 it was clear there was some confusion with the current connection logic. To analyse the flow to make sure there was no poll stalling, I ended up with the following refactor. Notable changes: 1. Now all functions called `poll_xyz` and that have a `cx: &mut Context` argument must return a `Poll<_>` type, and can only return `Pending` iff an internal poll call also returned `Pending` 2. State management is handled entirely by `poll_messages`. There are now only 2 states which makes it much easier to keep track of. Each commit should be self-reviewable and should be simple to verify that it keeps the same behaviour --- libs/proxy/tokio-postgres2/src/connection.rs | 147 ++++++++++--------- 1 file changed, 81 insertions(+), 66 deletions(-) diff --git a/libs/proxy/tokio-postgres2/src/connection.rs b/libs/proxy/tokio-postgres2/src/connection.rs index 0aa5c77e22..f478717e0d 100644 --- a/libs/proxy/tokio-postgres2/src/connection.rs +++ b/libs/proxy/tokio-postgres2/src/connection.rs @@ -33,10 +33,14 @@ pub struct Response { #[derive(PartialEq, Debug)] enum State { Active, - Terminating, Closing, } +enum WriteReady { + Terminating, + WaitingOnRead, +} + /// A connection to a PostgreSQL database. /// /// This is one half of what is returned when a new connection is established. It performs the actual IO with the @@ -51,7 +55,6 @@ pub struct Connection { /// HACK: we need this in the Neon Proxy to forward params. pub parameters: HashMap, receiver: mpsc::UnboundedReceiver, - pending_request: Option, pending_responses: VecDeque, responses: VecDeque, state: State, @@ -72,7 +75,6 @@ where stream, parameters, receiver, - pending_request: None, pending_responses, responses: VecDeque::new(), state: State::Active, @@ -93,26 +95,23 @@ where .map(|o| o.map(|r| r.map_err(Error::io))) } - fn poll_read(&mut self, cx: &mut Context<'_>) -> Result, Error> { - if self.state != State::Active { - trace!("poll_read: done"); - return Ok(None); - } - + /// Read and process messages from the connection to postgres. + /// client <- postgres + fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll> { loop { let message = match self.poll_response(cx)? { Poll::Ready(Some(message)) => message, - Poll::Ready(None) => return Err(Error::closed()), + Poll::Ready(None) => return Poll::Ready(Err(Error::closed())), Poll::Pending => { trace!("poll_read: waiting on response"); - return Ok(None); + return Poll::Pending; } }; let (mut messages, request_complete) = match message { BackendMessage::Async(Message::NoticeResponse(body)) => { let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?; - return Ok(Some(AsyncMessage::Notice(error))); + return Poll::Ready(Ok(AsyncMessage::Notice(error))); } BackendMessage::Async(Message::NotificationResponse(body)) => { let notification = Notification { @@ -120,7 +119,7 @@ where channel: body.channel().map_err(Error::parse)?.to_string(), payload: body.message().map_err(Error::parse)?.to_string(), }; - return Ok(Some(AsyncMessage::Notification(notification))); + return Poll::Ready(Ok(AsyncMessage::Notification(notification))); } BackendMessage::Async(Message::ParameterStatus(body)) => { self.parameters.insert( @@ -139,8 +138,10 @@ where let mut response = match self.responses.pop_front() { Some(response) => response, None => match messages.next().map_err(Error::parse)? { - Some(Message::ErrorResponse(error)) => return Err(Error::db(error)), - _ => return Err(Error::unexpected_message()), + Some(Message::ErrorResponse(error)) => { + return Poll::Ready(Err(Error::db(error))) + } + _ => return Poll::Ready(Err(Error::unexpected_message())), }, }; @@ -164,18 +165,14 @@ where request_complete, }); trace!("poll_read: waiting on sender"); - return Ok(None); + return Poll::Pending; } } } } + /// Fetch the next client request and enqueue the response sender. fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll> { - if let Some(messages) = self.pending_request.take() { - trace!("retrying pending request"); - return Poll::Ready(Some(messages)); - } - if self.receiver.is_closed() { return Poll::Ready(None); } @@ -193,74 +190,80 @@ where } } - fn poll_write(&mut self, cx: &mut Context<'_>) -> Result { + /// Process client requests and write them to the postgres connection, flushing if necessary. + /// client -> postgres + fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll> { loop { - if self.state == State::Closing { - trace!("poll_write: done"); - return Ok(false); - } - if Pin::new(&mut self.stream) .poll_ready(cx) .map_err(Error::io)? .is_pending() { trace!("poll_write: waiting on socket"); - return Ok(false); + + // poll_ready is self-flushing. + return Poll::Pending; } - let request = match self.poll_request(cx) { - Poll::Ready(Some(request)) => request, - Poll::Ready(None) if self.responses.is_empty() && self.state == State::Active => { + match self.poll_request(cx) { + // send the message to postgres + Poll::Ready(Some(RequestMessages::Single(request))) => { + Pin::new(&mut self.stream) + .start_send(request) + .map_err(Error::io)?; + } + // No more messages from the client, and no more responses to wait for. + // Send a terminate message to postgres + Poll::Ready(None) if self.responses.is_empty() => { trace!("poll_write: at eof, terminating"); - self.state = State::Terminating; let mut request = BytesMut::new(); frontend::terminate(&mut request); - RequestMessages::Single(FrontendMessage::Raw(request.freeze())) + let request = FrontendMessage::Raw(request.freeze()); + + Pin::new(&mut self.stream) + .start_send(request) + .map_err(Error::io)?; + + trace!("poll_write: sent eof, closing"); + trace!("poll_write: done"); + return Poll::Ready(Ok(WriteReady::Terminating)); } + // No more messages from the client, but there are still some responses to wait for. Poll::Ready(None) => { trace!( "poll_write: at eof, pending responses {}", self.responses.len() ); - return Ok(true); + ready!(self.poll_flush(cx))?; + return Poll::Ready(Ok(WriteReady::WaitingOnRead)); } + // Still waiting for a message from the client. Poll::Pending => { trace!("poll_write: waiting on request"); - return Ok(true); - } - }; - - match request { - RequestMessages::Single(request) => { - Pin::new(&mut self.stream) - .start_send(request) - .map_err(Error::io)?; - if self.state == State::Terminating { - trace!("poll_write: sent eof, closing"); - self.state = State::Closing; - } + ready!(self.poll_flush(cx))?; + return Poll::Pending; } } } } - fn poll_flush(&mut self, cx: &mut Context<'_>) -> Result<(), Error> { + fn poll_flush(&mut self, cx: &mut Context<'_>) -> Poll> { match Pin::new(&mut self.stream) .poll_flush(cx) .map_err(Error::io)? { - Poll::Ready(()) => trace!("poll_flush: flushed"), - Poll::Pending => trace!("poll_flush: waiting on socket"), + Poll::Ready(()) => { + trace!("poll_flush: flushed"); + Poll::Ready(Ok(())) + } + Poll::Pending => { + trace!("poll_flush: waiting on socket"); + Poll::Pending + } } - Ok(()) } fn poll_shutdown(&mut self, cx: &mut Context<'_>) -> Poll> { - if self.state != State::Closing { - return Poll::Pending; - } - match Pin::new(&mut self.stream) .poll_close(cx) .map_err(Error::io)? @@ -289,18 +292,30 @@ where &mut self, cx: &mut Context<'_>, ) -> Poll>> { - let message = self.poll_read(cx)?; - let want_flush = self.poll_write(cx)?; - if want_flush { - self.poll_flush(cx)?; + if self.state != State::Closing { + // if the state is still active, try read from and write to postgres. + let message = self.poll_read(cx)?; + let closing = self.poll_write(cx)?; + if let Poll::Ready(WriteReady::Terminating) = closing { + self.state = State::Closing; + } + + if let Poll::Ready(message) = message { + return Poll::Ready(Some(Ok(message))); + } + + // poll_read returned Pending. + // poll_write returned Pending or Ready(WriteReady::WaitingOnRead). + // if poll_write returned Ready(WriteReady::WaitingOnRead), then we are waiting to read more data from postgres. + if self.state != State::Closing { + return Poll::Pending; + } } - match message { - Some(message) => Poll::Ready(Some(Ok(message))), - None => match self.poll_shutdown(cx) { - Poll::Ready(Ok(())) => Poll::Ready(None), - Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))), - Poll::Pending => Poll::Pending, - }, + + match self.poll_shutdown(cx) { + Poll::Ready(Ok(())) => Poll::Ready(None), + Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))), + Poll::Pending => Poll::Pending, } } } From 38c7a2abfc40bbbe2c952ed634aaab32fb100fcc Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 2 Jan 2025 09:36:13 +0000 Subject: [PATCH 57/89] chore(proxy): pre-load native tls certificates and propagate compute client config (#10182) Now that we construct the TLS client config for cancellation as well as connect, it feels appropriate to construct the same config once and re-use it elsewhere. It might also help should #7500 require any extra setup, so we can easily add it to all the appropriate call sites. --- proxy/src/auth/flow.rs | 2 +- proxy/src/bin/local_proxy.rs | 16 +- proxy/src/bin/pg_sni_router.rs | 2 +- proxy/src/bin/proxy.rs | 17 +- proxy/src/cancellation.rs | 77 +++-- proxy/src/compute.rs | 41 +-- proxy/src/config.rs | 288 +----------------- proxy/src/console_redirect_proxy.rs | 4 +- proxy/src/control_plane/mod.rs | 6 +- proxy/src/lib.rs | 2 +- proxy/src/proxy/connect_compute.rs | 28 +- proxy/src/proxy/handshake.rs | 3 +- proxy/src/proxy/mod.rs | 4 +- proxy/src/proxy/passthrough.rs | 13 +- proxy/src/proxy/tests/mod.rs | 91 +++--- proxy/src/redis/notifications.rs | 3 + proxy/src/scram/exchange.rs | 13 +- proxy/src/scram/mod.rs | 7 +- proxy/src/serverless/backend.rs | 14 +- proxy/src/serverless/websocket.rs | 2 +- proxy/src/stream.rs | 2 +- proxy/src/tls/client_config.rs | 42 +++ proxy/src/tls/mod.rs | 72 +++++ .../mod.rs => tls/postgres_rustls.rs} | 10 +- proxy/src/tls/server_config.rs | 218 +++++++++++++ 25 files changed, 509 insertions(+), 468 deletions(-) create mode 100644 proxy/src/tls/client_config.rs create mode 100644 proxy/src/tls/mod.rs rename proxy/src/{postgres_rustls/mod.rs => tls/postgres_rustls.rs} (96%) create mode 100644 proxy/src/tls/server_config.rs diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs index 60d1962d7f..0992c6d875 100644 --- a/proxy/src/auth/flow.rs +++ b/proxy/src/auth/flow.rs @@ -10,7 +10,6 @@ use tracing::info; use super::backend::ComputeCredentialKeys; use super::{AuthError, PasswordHackPayload}; -use crate::config::TlsServerEndPoint; use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::intern::EndpointIdInt; @@ -18,6 +17,7 @@ use crate::sasl; use crate::scram::threadpool::ThreadPool; use crate::scram::{self}; use crate::stream::{PqStream, Stream}; +use crate::tls::TlsServerEndPoint; /// Every authentication selector is supposed to implement this trait. pub(crate) trait AuthMethod { diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs index 56bbd94850..644f670f88 100644 --- a/proxy/src/bin/local_proxy.rs +++ b/proxy/src/bin/local_proxy.rs @@ -13,7 +13,9 @@ use proxy::auth::backend::jwt::JwkCache; use proxy::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP}; use proxy::auth::{self}; use proxy::cancellation::CancellationHandlerMain; -use proxy::config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig}; +use proxy::config::{ + self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig, +}; use proxy::control_plane::locks::ApiLocks; use proxy::control_plane::messages::{EndpointJwksResponse, JwksSettings}; use proxy::http::health_server::AppMetrics; @@ -25,6 +27,7 @@ use proxy::rate_limiter::{ use proxy::scram::threadpool::ThreadPool; use proxy::serverless::cancel_set::CancelSet; use proxy::serverless::{self, GlobalConnPoolOptions}; +use proxy::tls::client_config::compute_client_config_with_root_certs; use proxy::types::RoleName; use proxy::url::ApiUrl; @@ -209,6 +212,7 @@ async fn main() -> anyhow::Result<()> { http_listener, shutdown.clone(), Arc::new(CancellationHandlerMain::new( + &config.connect_to_compute, Arc::new(DashMap::new()), None, proxy::metrics::CancellationSource::Local, @@ -268,6 +272,12 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes, }; + let compute_config = ComputeConfig { + retry: RetryConfig::parse(RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)?, + tls: Arc::new(compute_client_config_with_root_certs()?), + timeout: Duration::from_secs(2), + }; + Ok(Box::leak(Box::new(ProxyConfig { tls_config: None, metric_collection: None, @@ -289,9 +299,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig region: "local".into(), wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?, connect_compute_locks, - connect_to_compute_retry_config: RetryConfig::parse( - RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES, - )?, + connect_to_compute: compute_config, }))) } diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs index 9538384b9e..97d870a83a 100644 --- a/proxy/src/bin/pg_sni_router.rs +++ b/proxy/src/bin/pg_sni_router.rs @@ -10,12 +10,12 @@ use clap::Arg; use futures::future::Either; use futures::TryFutureExt; use itertools::Itertools; -use proxy::config::TlsServerEndPoint; use proxy::context::RequestContext; use proxy::metrics::{Metrics, ThreadPoolMetrics}; use proxy::protocol2::ConnectionInfo; use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource}; use proxy::stream::{PqStream, Stream}; +use proxy::tls::TlsServerEndPoint; use rustls::crypto::ring; use rustls::pki_types::PrivateKeyDer; use tokio::io::{AsyncRead, AsyncWrite}; diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index 3dcf9ca060..3b122d771c 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -1,6 +1,7 @@ use std::net::SocketAddr; use std::pin::pin; use std::sync::Arc; +use std::time::Duration; use anyhow::bail; use futures::future::Either; @@ -8,7 +9,7 @@ use proxy::auth::backend::jwt::JwkCache; use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned}; use proxy::cancellation::{CancelMap, CancellationHandler}; use proxy::config::{ - self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, HttpConfig, + self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2, }; use proxy::context::parquet::ParquetUploadArgs; @@ -23,6 +24,7 @@ use proxy::redis::{elasticache, notifications}; use proxy::scram::threadpool::ThreadPool; use proxy::serverless::cancel_set::CancelSet; use proxy::serverless::GlobalConnPoolOptions; +use proxy::tls::client_config::compute_client_config_with_root_certs; use proxy::{auth, control_plane, http, serverless, usage_metrics}; use remote_storage::RemoteStorageConfig; use tokio::net::TcpListener; @@ -397,6 +399,7 @@ async fn main() -> anyhow::Result<()> { let cancellation_handler = Arc::new(CancellationHandler::< Option>>, >::new( + &config.connect_to_compute, cancel_map.clone(), redis_publisher, proxy::metrics::CancellationSource::FromClient, @@ -492,6 +495,7 @@ async fn main() -> anyhow::Result<()> { let cache = api.caches.project_info.clone(); if let Some(client) = client1 { maintenance_tasks.spawn(notifications::task_main( + config, client, cache.clone(), cancel_map.clone(), @@ -500,6 +504,7 @@ async fn main() -> anyhow::Result<()> { } if let Some(client) = client2 { maintenance_tasks.spawn(notifications::task_main( + config, client, cache.clone(), cancel_map.clone(), @@ -632,6 +637,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { console_redirect_confirmation_timeout: args.webauth_confirmation_timeout, }; + let compute_config = ComputeConfig { + retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?, + tls: Arc::new(compute_client_config_with_root_certs()?), + timeout: Duration::from_secs(2), + }; + let config = ProxyConfig { tls_config, metric_collection, @@ -642,9 +653,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { region: args.region.clone(), wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?, connect_compute_locks, - connect_to_compute_retry_config: config::RetryConfig::parse( - &args.connect_to_compute_retry, - )?, + connect_to_compute: compute_config, }; let config = Box::leak(Box::new(config)); diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index ebaea173ae..df618cf242 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -3,11 +3,9 @@ use std::sync::Arc; use dashmap::DashMap; use ipnet::{IpNet, Ipv4Net, Ipv6Net}; -use once_cell::sync::OnceCell; use postgres_client::tls::MakeTlsConnect; use postgres_client::CancelToken; use pq_proto::CancelKeyData; -use rustls::crypto::ring; use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; @@ -15,15 +13,15 @@ use tracing::{debug, info}; use uuid::Uuid; use crate::auth::{check_peer_addr_is_in_list, IpPattern}; -use crate::compute::load_certs; +use crate::config::ComputeConfig; use crate::error::ReportableError; use crate::ext::LockExt; use crate::metrics::{CancellationRequest, CancellationSource, Metrics}; -use crate::postgres_rustls::MakeRustlsConnect; use crate::rate_limiter::LeakyBucketRateLimiter; use crate::redis::cancellation_publisher::{ CancellationPublisher, CancellationPublisherMut, RedisPublisherClient, }; +use crate::tls::postgres_rustls::MakeRustlsConnect; pub type CancelMap = Arc>>; pub type CancellationHandlerMain = CancellationHandler>>>; @@ -35,6 +33,7 @@ type IpSubnetKey = IpNet; /// /// If `CancellationPublisher` is available, cancel request will be used to publish the cancellation key to other proxy instances. pub struct CancellationHandler

{ + compute_config: &'static ComputeConfig, map: CancelMap, client: P, /// This field used for the monitoring purposes. @@ -183,7 +182,7 @@ impl CancellationHandler

{ "cancelling query per user's request using key {key}, hostname {}, address: {}", cancel_closure.hostname, cancel_closure.socket_addr ); - cancel_closure.try_cancel_query().await + cancel_closure.try_cancel_query(self.compute_config).await } #[cfg(test)] @@ -198,8 +197,13 @@ impl CancellationHandler

{ } impl CancellationHandler<()> { - pub fn new(map: CancelMap, from: CancellationSource) -> Self { + pub fn new( + compute_config: &'static ComputeConfig, + map: CancelMap, + from: CancellationSource, + ) -> Self { Self { + compute_config, map, client: (), from, @@ -214,8 +218,14 @@ impl CancellationHandler<()> { } impl CancellationHandler>>> { - pub fn new(map: CancelMap, client: Option>>, from: CancellationSource) -> Self { + pub fn new( + compute_config: &'static ComputeConfig, + map: CancelMap, + client: Option>>, + from: CancellationSource, + ) -> Self { Self { + compute_config, map, client, from, @@ -229,8 +239,6 @@ impl CancellationHandler>>> { } } -static TLS_ROOTS: OnceCell> = OnceCell::new(); - /// This should've been a [`std::future::Future`], but /// it's impossible to name a type of an unboxed future /// (we'd need something like `#![feature(type_alias_impl_trait)]`). @@ -257,27 +265,14 @@ impl CancelClosure { } } /// Cancels the query running on user's compute node. - pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { + pub(crate) async fn try_cancel_query( + self, + compute_config: &ComputeConfig, + ) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; - let root_store = TLS_ROOTS - .get_or_try_init(load_certs) - .map_err(|_e| { - CancelError::IO(std::io::Error::new( - std::io::ErrorKind::Other, - "TLS root store initialization failed".to_string(), - )) - })? - .clone(); - - let client_config = - rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) - .with_safe_default_protocol_versions() - .expect("ring should support the default protocol versions") - .with_root_certificates(root_store) - .with_no_client_auth(); - - let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config); + let mut mk_tls = + crate::tls::postgres_rustls::MakeRustlsConnect::new(compute_config.tls.clone()); let tls = >::make_tls_connect( &mut mk_tls, &self.hostname, @@ -329,11 +324,30 @@ impl

Drop for Session

{ #[cfg(test)] #[expect(clippy::unwrap_used)] mod tests { + use std::time::Duration; + use super::*; + use crate::config::RetryConfig; + use crate::tls::client_config::compute_client_config_with_certs; + + fn config() -> ComputeConfig { + let retry = RetryConfig { + base_delay: Duration::from_secs(1), + max_retries: 5, + backoff_factor: 2.0, + }; + + ComputeConfig { + retry, + tls: Arc::new(compute_client_config_with_certs(std::iter::empty())), + timeout: Duration::from_secs(2), + } + } #[tokio::test] async fn check_session_drop() -> anyhow::Result<()> { let cancellation_handler = Arc::new(CancellationHandler::<()>::new( + Box::leak(Box::new(config())), CancelMap::default(), CancellationSource::FromRedis, )); @@ -349,8 +363,11 @@ mod tests { #[tokio::test] async fn cancel_session_noop_regression() { - let handler = - CancellationHandler::<()>::new(CancelMap::default(), CancellationSource::Local); + let handler = CancellationHandler::<()>::new( + Box::leak(Box::new(config())), + CancelMap::default(), + CancellationSource::Local, + ); handler .cancel_session( CancelKeyData { diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 8dc9b59e81..d60dfd0f80 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -1,16 +1,13 @@ use std::io; use std::net::SocketAddr; -use std::sync::Arc; use std::time::Duration; use futures::{FutureExt, TryFutureExt}; use itertools::Itertools; -use once_cell::sync::OnceCell; use postgres_client::tls::MakeTlsConnect; use postgres_client::{CancelToken, RawConnection}; use postgres_protocol::message::backend::NoticeResponseBody; use pq_proto::StartupMessageParams; -use rustls::crypto::ring; use rustls::pki_types::InvalidDnsNameError; use thiserror::Error; use tokio::net::TcpStream; @@ -18,14 +15,15 @@ use tracing::{debug, error, info, warn}; use crate::auth::parse_endpoint_param; use crate::cancellation::CancelClosure; +use crate::config::ComputeConfig; use crate::context::RequestContext; use crate::control_plane::client::ApiLockError; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::messages::MetricsAuxInfo; use crate::error::{ReportableError, UserFacingError}; use crate::metrics::{Metrics, NumDbConnectionsGuard}; -use crate::postgres_rustls::MakeRustlsConnect; use crate::proxy::neon_option; +use crate::tls::postgres_rustls::MakeRustlsConnect; use crate::types::Host; pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node"; @@ -40,9 +38,6 @@ pub(crate) enum ConnectionError { #[error("{COULD_NOT_CONNECT}: {0}")] CouldNotConnect(#[from] io::Error), - #[error("Couldn't load native TLS certificates: {0:?}")] - TlsCertificateError(Vec), - #[error("{COULD_NOT_CONNECT}: {0}")] TlsError(#[from] InvalidDnsNameError), @@ -89,7 +84,6 @@ impl ReportableError for ConnectionError { } ConnectionError::Postgres(_) => crate::error::ErrorKind::Compute, ConnectionError::CouldNotConnect(_) => crate::error::ErrorKind::Compute, - ConnectionError::TlsCertificateError(_) => crate::error::ErrorKind::Service, ConnectionError::TlsError(_) => crate::error::ErrorKind::Compute, ConnectionError::WakeComputeError(e) => e.get_error_kind(), ConnectionError::TooManyConnectionAttempts(e) => e.get_error_kind(), @@ -251,25 +245,13 @@ impl ConnCfg { &self, ctx: &RequestContext, aux: MetricsAuxInfo, - timeout: Duration, + config: &ComputeConfig, ) -> Result { let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); - let (socket_addr, stream, host) = self.connect_raw(timeout).await?; + let (socket_addr, stream, host) = self.connect_raw(config.timeout).await?; drop(pause); - let root_store = TLS_ROOTS - .get_or_try_init(load_certs) - .map_err(ConnectionError::TlsCertificateError)? - .clone(); - - let client_config = - rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) - .with_safe_default_protocol_versions() - .expect("ring should support the default protocol versions") - .with_root_certificates(root_store) - .with_no_client_auth(); - - let mut mk_tls = crate::postgres_rustls::MakeRustlsConnect::new(client_config); + let mut mk_tls = crate::tls::postgres_rustls::MakeRustlsConnect::new(config.tls.clone()); let tls = >::make_tls_connect( &mut mk_tls, host, @@ -341,19 +323,6 @@ fn filtered_options(options: &str) -> Option { Some(options) } -pub(crate) fn load_certs() -> Result, Vec> { - let der_certs = rustls_native_certs::load_native_certs(); - - if !der_certs.errors.is_empty() { - return Err(der_certs.errors); - } - - let mut store = rustls::RootCertStore::empty(); - store.add_parsable_certificates(der_certs.certs); - Ok(Arc::new(store)) -} -static TLS_ROOTS: OnceCell> = OnceCell::new(); - #[cfg(test)] mod tests { use super::*; diff --git a/proxy/src/config.rs b/proxy/src/config.rs index 33d1d2e9e4..8502edcfab 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -1,17 +1,10 @@ -use std::collections::{HashMap, HashSet}; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; use anyhow::{bail, ensure, Context, Ok}; use clap::ValueEnum; -use itertools::Itertools; use remote_storage::RemoteStorageConfig; -use rustls::crypto::ring::{self, sign}; -use rustls::pki_types::{CertificateDer, PrivateKeyDer}; -use sha2::{Digest, Sha256}; -use tracing::{error, info}; -use x509_parser::oid_registry; use crate::auth::backend::jwt::JwkCache; use crate::auth::backend::AuthRateLimiter; @@ -20,6 +13,7 @@ use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig} use crate::scram::threadpool::ThreadPool; use crate::serverless::cancel_set::CancelSet; use crate::serverless::GlobalConnPoolOptions; +pub use crate::tls::server_config::{configure_tls, TlsConfig}; use crate::types::Host; pub struct ProxyConfig { @@ -32,7 +26,13 @@ pub struct ProxyConfig { pub handshake_timeout: Duration, pub wake_compute_retry_config: RetryConfig, pub connect_compute_locks: ApiLocks, - pub connect_to_compute_retry_config: RetryConfig, + pub connect_to_compute: ComputeConfig, +} + +pub struct ComputeConfig { + pub retry: RetryConfig, + pub tls: Arc, + pub timeout: Duration, } #[derive(Copy, Clone, Debug, ValueEnum, PartialEq)] @@ -52,12 +52,6 @@ pub struct MetricCollectionConfig { pub backup_metric_collection_config: MetricBackupCollectionConfig, } -pub struct TlsConfig { - pub config: Arc, - pub common_names: HashSet, - pub cert_resolver: Arc, -} - pub struct HttpConfig { pub accept_websockets: bool, pub pool_options: GlobalConnPoolOptions, @@ -80,272 +74,6 @@ pub struct AuthenticationConfig { pub console_redirect_confirmation_timeout: tokio::time::Duration, } -impl TlsConfig { - pub fn to_server_config(&self) -> Arc { - self.config.clone() - } -} - -/// -pub const PG_ALPN_PROTOCOL: &[u8] = b"postgresql"; - -/// Configure TLS for the main endpoint. -pub fn configure_tls( - key_path: &str, - cert_path: &str, - certs_dir: Option<&String>, - allow_tls_keylogfile: bool, -) -> anyhow::Result { - let mut cert_resolver = CertResolver::new(); - - // add default certificate - cert_resolver.add_cert_path(key_path, cert_path, true)?; - - // add extra certificates - if let Some(certs_dir) = certs_dir { - for entry in std::fs::read_dir(certs_dir)? { - let entry = entry?; - let path = entry.path(); - if path.is_dir() { - // file names aligned with default cert-manager names - let key_path = path.join("tls.key"); - let cert_path = path.join("tls.crt"); - if key_path.exists() && cert_path.exists() { - cert_resolver.add_cert_path( - &key_path.to_string_lossy(), - &cert_path.to_string_lossy(), - false, - )?; - } - } - } - } - - let common_names = cert_resolver.get_common_names(); - - let cert_resolver = Arc::new(cert_resolver); - - // allow TLS 1.2 to be compatible with older client libraries - let mut config = - rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider())) - .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12]) - .context("ring should support TLS1.2 and TLS1.3")? - .with_no_client_auth() - .with_cert_resolver(cert_resolver.clone()); - - config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()]; - - if allow_tls_keylogfile { - // KeyLogFile will check for the SSLKEYLOGFILE environment variable. - config.key_log = Arc::new(rustls::KeyLogFile::new()); - } - - Ok(TlsConfig { - config: Arc::new(config), - common_names, - cert_resolver, - }) -} - -/// Channel binding parameter -/// -/// -/// Description: The hash of the TLS server's certificate as it -/// appears, octet for octet, in the server's Certificate message. Note -/// that the Certificate message contains a certificate_list, in which -/// the first element is the server's certificate. -/// -/// The hash function is to be selected as follows: -/// -/// * if the certificate's signatureAlgorithm uses a single hash -/// function, and that hash function is either MD5 or SHA-1, then use SHA-256; -/// -/// * if the certificate's signatureAlgorithm uses a single hash -/// function and that hash function neither MD5 nor SHA-1, then use -/// the hash function associated with the certificate's -/// signatureAlgorithm; -/// -/// * if the certificate's signatureAlgorithm uses no hash functions or -/// uses multiple hash functions, then this channel binding type's -/// channel bindings are undefined at this time (updates to is channel -/// binding type may occur to address this issue if it ever arises). -#[derive(Debug, Clone, Copy)] -pub enum TlsServerEndPoint { - Sha256([u8; 32]), - Undefined, -} - -impl TlsServerEndPoint { - pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result { - let sha256_oids = [ - // I'm explicitly not adding MD5 or SHA1 here... They're bad. - oid_registry::OID_SIG_ECDSA_WITH_SHA256, - oid_registry::OID_PKCS1_SHA256WITHRSA, - ]; - - let pem = x509_parser::parse_x509_certificate(cert) - .context("Failed to parse PEM object from cerficiate")? - .1; - - info!(subject = %pem.subject, "parsing TLS certificate"); - - let reg = oid_registry::OidRegistry::default().with_all_crypto(); - let oid = pem.signature_algorithm.oid(); - let alg = reg.get(oid); - if sha256_oids.contains(oid) { - let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into(); - info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding"); - Ok(Self::Sha256(tls_server_end_point)) - } else { - error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding"); - Ok(Self::Undefined) - } - } - - pub fn supported(&self) -> bool { - !matches!(self, TlsServerEndPoint::Undefined) - } -} - -#[derive(Default, Debug)] -pub struct CertResolver { - certs: HashMap, TlsServerEndPoint)>, - default: Option<(Arc, TlsServerEndPoint)>, -} - -impl CertResolver { - pub fn new() -> Self { - Self::default() - } - - fn add_cert_path( - &mut self, - key_path: &str, - cert_path: &str, - is_default: bool, - ) -> anyhow::Result<()> { - let priv_key = { - let key_bytes = std::fs::read(key_path) - .with_context(|| format!("Failed to read TLS keys at '{key_path}'"))?; - rustls_pemfile::private_key(&mut &key_bytes[..]) - .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? - .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? - }; - - let cert_chain_bytes = std::fs::read(cert_path) - .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?; - - let cert_chain = { - rustls_pemfile::certs(&mut &cert_chain_bytes[..]) - .try_collect() - .with_context(|| { - format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.") - })? - }; - - self.add_cert(priv_key, cert_chain, is_default) - } - - pub fn add_cert( - &mut self, - priv_key: PrivateKeyDer<'static>, - cert_chain: Vec>, - is_default: bool, - ) -> anyhow::Result<()> { - let key = sign::any_supported_type(&priv_key).context("invalid private key")?; - - let first_cert = &cert_chain[0]; - let tls_server_end_point = TlsServerEndPoint::new(first_cert)?; - let pem = x509_parser::parse_x509_certificate(first_cert) - .context("Failed to parse PEM object from cerficiate")? - .1; - - let common_name = pem.subject().to_string(); - - // We need to get the canonical name for this certificate so we can match them against any domain names - // seen within the proxy codebase. - // - // In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI. - // We need to remove the wildcard prefix for the purposes of certificate selection. - // - // auth-broker does not use SNI and instead uses the Neon-Connection-String header. - // Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String. - // - // Console Redirect proxy does not use any wildcard domains and does not need any certificate selection or conn string - // validation, so let's we can continue with any common-name - let common_name = if let Some(s) = common_name.strip_prefix("CN=*.") { - s.to_string() - } else if let Some(s) = common_name.strip_prefix("CN=apiauth.") { - s.to_string() - } else if let Some(s) = common_name.strip_prefix("CN=") { - s.to_string() - } else { - bail!("Failed to parse common name from certificate") - }; - - let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)); - - if is_default { - self.default = Some((cert.clone(), tls_server_end_point)); - } - - self.certs.insert(common_name, (cert, tls_server_end_point)); - - Ok(()) - } - - pub fn get_common_names(&self) -> HashSet { - self.certs.keys().map(|s| s.to_string()).collect() - } -} - -impl rustls::server::ResolvesServerCert for CertResolver { - fn resolve( - &self, - client_hello: rustls::server::ClientHello<'_>, - ) -> Option> { - self.resolve(client_hello.server_name()).map(|x| x.0) - } -} - -impl CertResolver { - pub fn resolve( - &self, - server_name: Option<&str>, - ) -> Option<(Arc, TlsServerEndPoint)> { - // loop here and cut off more and more subdomains until we find - // a match to get a proper wildcard support. OTOH, we now do not - // use nested domains, so keep this simple for now. - // - // With the current coding foo.com will match *.foo.com and that - // repeats behavior of the old code. - if let Some(mut sni_name) = server_name { - loop { - if let Some(cert) = self.certs.get(sni_name) { - return Some(cert.clone()); - } - if let Some((_, rest)) = sni_name.split_once('.') { - sni_name = rest; - } else { - return None; - } - } - } else { - // No SNI, use the default certificate, otherwise we can't get to - // options parameter which can be used to set endpoint name too. - // That means that non-SNI flow will not work for CNAME domains in - // verify-full mode. - // - // If that will be a problem we can: - // - // a) Instead of multi-cert approach use single cert with extra - // domains listed in Subject Alternative Name (SAN). - // b) Deploy separate proxy instances for extra domains. - self.default.clone() - } - } -} - #[derive(Debug)] pub struct EndpointCacheConfig { /// Batch size to receive all endpoints on the startup. diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index c477822e85..25a549039c 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -115,7 +115,7 @@ pub async fn task_main( Ok(Some(p)) => { ctx.set_success(); let _disconnect = ctx.log_connect(); - match p.proxy_pass().await { + match p.proxy_pass(&config.connect_to_compute).await { Ok(()) => {} Err(ErrorSource::Client(e)) => { error!(?session_id, "per-client task finished with an IO error from the client: {e:#}"); @@ -216,7 +216,7 @@ pub(crate) async fn handle_client( }, &user_info, config.wake_compute_retry_config, - config.connect_to_compute_retry_config, + &config.connect_to_compute, ) .or_else(|e| stream.throw_error(e)) .await?; diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs index 0ca1a6aae0..c65041df0e 100644 --- a/proxy/src/control_plane/mod.rs +++ b/proxy/src/control_plane/mod.rs @@ -10,13 +10,13 @@ pub mod client; pub(crate) mod errors; use std::sync::Arc; -use std::time::Duration; use crate::auth::backend::jwt::AuthRule; use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo}; use crate::auth::IpPattern; use crate::cache::project_info::ProjectInfoCacheImpl; use crate::cache::{Cached, TimedLru}; +use crate::config::ComputeConfig; use crate::context::RequestContext; use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo}; use crate::intern::ProjectIdInt; @@ -73,9 +73,9 @@ impl NodeInfo { pub(crate) async fn connect( &self, ctx: &RequestContext, - timeout: Duration, + config: &ComputeConfig, ) -> Result { - self.config.connect(ctx, self.aux.clone(), timeout).await + self.config.connect(ctx, self.aux.clone(), config).await } pub(crate) fn reuse_settings(&mut self, other: Self) { diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs index a5a72f26d9..c56474edd7 100644 --- a/proxy/src/lib.rs +++ b/proxy/src/lib.rs @@ -89,7 +89,6 @@ pub mod jemalloc; pub mod logging; pub mod metrics; pub mod parse; -pub mod postgres_rustls; pub mod protocol2; pub mod proxy; pub mod rate_limiter; @@ -99,6 +98,7 @@ pub mod scram; pub mod serverless; pub mod signals; pub mod stream; +pub mod tls; pub mod types; pub mod url; pub mod usage_metrics; diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 4a30d23985..8a80494860 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -6,7 +6,7 @@ use tracing::{debug, info, warn}; use super::retry::ShouldRetryWakeCompute; use crate::auth::backend::ComputeCredentialKeys; use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT}; -use crate::config::RetryConfig; +use crate::config::{ComputeConfig, RetryConfig}; use crate::context::RequestContext; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::locks::ApiLocks; @@ -19,8 +19,6 @@ use crate::proxy::retry::{retry_after, should_retry, CouldRetry}; use crate::proxy::wake_compute::wake_compute; use crate::types::Host; -const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2); - /// If we couldn't connect, a cached connection info might be to blame /// (e.g. the compute node's address might've changed at the wrong time). /// Invalidate the cache entry (if any) to prevent subsequent errors. @@ -49,7 +47,7 @@ pub(crate) trait ConnectMechanism { &self, ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, - timeout: time::Duration, + config: &ComputeConfig, ) -> Result; fn update_connect_config(&self, conf: &mut compute::ConnCfg); @@ -86,11 +84,11 @@ impl ConnectMechanism for TcpMechanism<'_> { &self, ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, - timeout: time::Duration, + config: &ComputeConfig, ) -> Result { let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; - permit.release_result(node_info.connect(ctx, timeout).await) + permit.release_result(node_info.connect(ctx, config).await) } fn update_connect_config(&self, config: &mut compute::ConnCfg) { @@ -105,7 +103,7 @@ pub(crate) async fn connect_to_compute Result where M::ConnectError: CouldRetry + ShouldRetryWakeCompute + std::fmt::Debug, @@ -119,10 +117,7 @@ where mechanism.update_connect_config(&mut node_info.config); // try once - let err = match mechanism - .connect_once(ctx, &node_info, CONNECT_TIMEOUT) - .await - { + let err = match mechanism.connect_once(ctx, &node_info, compute).await { Ok(res) => { ctx.success(); Metrics::get().proxy.retries_metric.observe( @@ -142,7 +137,7 @@ where let node_info = if !node_info.cached() || !err.should_retry_wake_compute() { // If we just recieved this from cplane and didn't get it from cache, we shouldn't retry. // Do not need to retrieve a new node_info, just return the old one. - if should_retry(&err, num_retries, connect_to_compute_retry_config) { + if should_retry(&err, num_retries, compute.retry) { Metrics::get().proxy.retries_metric.observe( RetriesMetricGroup { outcome: ConnectOutcome::Failed, @@ -172,10 +167,7 @@ where debug!("wake_compute success. attempting to connect"); num_retries = 1; loop { - match mechanism - .connect_once(ctx, &node_info, CONNECT_TIMEOUT) - .await - { + match mechanism.connect_once(ctx, &node_info, compute).await { Ok(res) => { ctx.success(); Metrics::get().proxy.retries_metric.observe( @@ -190,7 +182,7 @@ where return Ok(res); } Err(e) => { - if !should_retry(&e, num_retries, connect_to_compute_retry_config) { + if !should_retry(&e, num_retries, compute.retry) { // Don't log an error here, caller will print the error Metrics::get().proxy.retries_metric.observe( RetriesMetricGroup { @@ -206,7 +198,7 @@ where } }; - let wait_duration = retry_after(num_retries, connect_to_compute_retry_config); + let wait_duration = retry_after(num_retries, compute.retry); num_retries += 1; let pause = ctx.latency_timer_pause(crate::metrics::Waiting::RetryTimeout); diff --git a/proxy/src/proxy/handshake.rs b/proxy/src/proxy/handshake.rs index e27c211932..955f754497 100644 --- a/proxy/src/proxy/handshake.rs +++ b/proxy/src/proxy/handshake.rs @@ -8,12 +8,13 @@ use tokio::io::{AsyncRead, AsyncWrite}; use tracing::{debug, info, warn}; use crate::auth::endpoint_sni; -use crate::config::{TlsConfig, PG_ALPN_PROTOCOL}; +use crate::config::TlsConfig; use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::Metrics; use crate::proxy::ERR_INSECURE_CONNECTION; use crate::stream::{PqStream, Stream, StreamUpgradeError}; +use crate::tls::PG_ALPN_PROTOCOL; #[derive(Error, Debug)] pub(crate) enum HandshakeError { diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index dbe174cab7..3926c56fec 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -152,7 +152,7 @@ pub async fn task_main( Ok(Some(p)) => { ctx.set_success(); let _disconnect = ctx.log_connect(); - match p.proxy_pass().await { + match p.proxy_pass(&config.connect_to_compute).await { Ok(()) => {} Err(ErrorSource::Client(e)) => { warn!(?session_id, "per-client task finished with an IO error from the client: {e:#}"); @@ -351,7 +351,7 @@ pub(crate) async fn handle_client( }, &user_info, config.wake_compute_retry_config, - config.connect_to_compute_retry_config, + &config.connect_to_compute, ) .or_else(|e| stream.throw_error(e)) .await?; diff --git a/proxy/src/proxy/passthrough.rs b/proxy/src/proxy/passthrough.rs index dcaa81e5cd..a42f9aad39 100644 --- a/proxy/src/proxy/passthrough.rs +++ b/proxy/src/proxy/passthrough.rs @@ -5,6 +5,7 @@ use utils::measured_stream::MeasuredStream; use super::copy_bidirectional::ErrorSource; use crate::cancellation; use crate::compute::PostgresConnection; +use crate::config::ComputeConfig; use crate::control_plane::messages::MetricsAuxInfo; use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard}; use crate::stream::Stream; @@ -67,9 +68,17 @@ pub(crate) struct ProxyPassthrough { } impl ProxyPassthrough { - pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> { + pub(crate) async fn proxy_pass( + self, + compute_config: &ComputeConfig, + ) -> Result<(), ErrorSource> { let res = proxy_pass(self.client, self.compute.stream, self.aux).await; - if let Err(err) = self.compute.cancel_closure.try_cancel_query().await { + if let Err(err) = self + .compute + .cancel_closure + .try_cancel_query(compute_config) + .await + { tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database"); } res diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 95c518fed9..10db2bcb30 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -22,14 +22,16 @@ use super::*; use crate::auth::backend::{ ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, }; -use crate::config::{CertResolver, RetryConfig}; +use crate::config::{ComputeConfig, RetryConfig}; use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient}; use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status}; use crate::control_plane::{ self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, NodeInfo, NodeInfoCache, }; use crate::error::ErrorKind; -use crate::postgres_rustls::MakeRustlsConnect; +use crate::tls::client_config::compute_client_config_with_certs; +use crate::tls::postgres_rustls::MakeRustlsConnect; +use crate::tls::server_config::CertResolver; use crate::types::{BranchId, EndpointId, ProjectId}; use crate::{sasl, scram}; @@ -67,7 +69,7 @@ fn generate_certs( } struct ClientConfig<'a> { - config: rustls::ClientConfig, + config: Arc, hostname: &'a str, } @@ -110,16 +112,7 @@ fn generate_tls_config<'a>( }; let client_config = { - let config = - rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) - .with_safe_default_protocol_versions() - .context("ring should support the default protocol versions")? - .with_root_certificates({ - let mut store = rustls::RootCertStore::empty(); - store.add(ca)?; - store - }) - .with_no_client_auth(); + let config = Arc::new(compute_client_config_with_certs([ca])); ClientConfig { config, hostname } }; @@ -468,7 +461,7 @@ impl ConnectMechanism for TestConnectMechanism { &self, _ctx: &RequestContext, _node_info: &control_plane::CachedNodeInfo, - _timeout: std::time::Duration, + _config: &ComputeConfig, ) -> Result { let mut counter = self.counter.lock().unwrap(); let action = self.sequence[*counter]; @@ -576,6 +569,20 @@ fn helper_create_connect_info( user_info } +fn config() -> ComputeConfig { + let retry = RetryConfig { + base_delay: Duration::from_secs(1), + max_retries: 5, + backoff_factor: 2.0, + }; + + ComputeConfig { + retry, + tls: Arc::new(compute_client_config_with_certs(std::iter::empty())), + timeout: Duration::from_secs(2), + } +} + #[tokio::test] async fn connect_to_compute_success() { let _ = env_logger::try_init(); @@ -583,12 +590,8 @@ async fn connect_to_compute_success() { let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); - let config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; - connect_to_compute(&ctx, &mechanism, &user_info, config, config) + let config = config(); + connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config) .await .unwrap(); mechanism.verify(); @@ -601,12 +604,8 @@ async fn connect_to_compute_retry() { let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); - let config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; - connect_to_compute(&ctx, &mechanism, &user_info, config, config) + let config = config(); + connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config) .await .unwrap(); mechanism.verify(); @@ -620,12 +619,8 @@ async fn connect_to_compute_non_retry_1() { let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]); let user_info = helper_create_connect_info(&mechanism); - let config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; - connect_to_compute(&ctx, &mechanism, &user_info, config, config) + let config = config(); + connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config) .await .unwrap_err(); mechanism.verify(); @@ -639,12 +634,8 @@ async fn connect_to_compute_non_retry_2() { let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); - let config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; - connect_to_compute(&ctx, &mechanism, &user_info, config, config) + let config = config(); + connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config) .await .unwrap(); mechanism.verify(); @@ -665,17 +656,13 @@ async fn connect_to_compute_non_retry_3() { max_retries: 1, backoff_factor: 2.0, }; - let connect_to_compute_retry_config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; + let config = config(); connect_to_compute( &ctx, &mechanism, &user_info, wake_compute_retry_config, - connect_to_compute_retry_config, + &config, ) .await .unwrap_err(); @@ -690,12 +677,8 @@ async fn wake_retry() { let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); - let config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; - connect_to_compute(&ctx, &mechanism, &user_info, config, config) + let config = config(); + connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config) .await .unwrap(); mechanism.verify(); @@ -709,12 +692,8 @@ async fn wake_non_retry() { let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]); let user_info = helper_create_connect_info(&mechanism); - let config = RetryConfig { - base_delay: Duration::from_secs(1), - max_retries: 5, - backoff_factor: 2.0, - }; - connect_to_compute(&ctx, &mechanism, &user_info, config, config) + let config = config(); + connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config) .await .unwrap_err(); mechanism.verify(); diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs index d18dfd2465..80b93b6c4f 100644 --- a/proxy/src/redis/notifications.rs +++ b/proxy/src/redis/notifications.rs @@ -12,6 +12,7 @@ use uuid::Uuid; use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider; use crate::cache::project_info::ProjectInfoCache; use crate::cancellation::{CancelMap, CancellationHandler}; +use crate::config::ProxyConfig; use crate::intern::{ProjectIdInt, RoleNameInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; @@ -249,6 +250,7 @@ async fn handle_messages( /// Handle console's invalidation messages. #[tracing::instrument(name = "redis_notifications", skip_all)] pub async fn task_main( + config: &'static ProxyConfig, redis: ConnectionWithCredentialsProvider, cache: Arc, cancel_map: CancelMap, @@ -258,6 +260,7 @@ where C: ProjectInfoCache + Send + Sync + 'static, { let cancellation_handler = Arc::new(CancellationHandler::<()>::new( + &config.connect_to_compute, cancel_map, crate::metrics::CancellationSource::FromRedis, )); diff --git a/proxy/src/scram/exchange.rs b/proxy/src/scram/exchange.rs index 6a13f645a5..77853db3db 100644 --- a/proxy/src/scram/exchange.rs +++ b/proxy/src/scram/exchange.rs @@ -13,7 +13,6 @@ use super::secret::ServerSecret; use super::signature::SignatureBuilder; use super::threadpool::ThreadPool; use super::ScramKey; -use crate::config; use crate::intern::EndpointIdInt; use crate::sasl::{self, ChannelBinding, Error as SaslError}; @@ -59,14 +58,14 @@ enum ExchangeState { pub(crate) struct Exchange<'a> { state: ExchangeState, secret: &'a ServerSecret, - tls_server_end_point: config::TlsServerEndPoint, + tls_server_end_point: crate::tls::TlsServerEndPoint, } impl<'a> Exchange<'a> { pub(crate) fn new( secret: &'a ServerSecret, nonce: fn() -> [u8; SCRAM_RAW_NONCE_LEN], - tls_server_end_point: config::TlsServerEndPoint, + tls_server_end_point: crate::tls::TlsServerEndPoint, ) -> Self { Self { state: ExchangeState::Initial(SaslInitial { nonce }), @@ -120,7 +119,7 @@ impl SaslInitial { fn transition( &self, secret: &ServerSecret, - tls_server_end_point: &config::TlsServerEndPoint, + tls_server_end_point: &crate::tls::TlsServerEndPoint, input: &str, ) -> sasl::Result> { let client_first_message = ClientFirstMessage::parse(input) @@ -155,7 +154,7 @@ impl SaslSentInner { fn transition( &self, secret: &ServerSecret, - tls_server_end_point: &config::TlsServerEndPoint, + tls_server_end_point: &crate::tls::TlsServerEndPoint, input: &str, ) -> sasl::Result> { let Self { @@ -168,8 +167,8 @@ impl SaslSentInner { .ok_or(SaslError::BadClientMessage("invalid client-final-message"))?; let channel_binding = cbind_flag.encode(|_| match tls_server_end_point { - config::TlsServerEndPoint::Sha256(x) => Ok(x), - config::TlsServerEndPoint::Undefined => Err(SaslError::MissingBinding), + crate::tls::TlsServerEndPoint::Sha256(x) => Ok(x), + crate::tls::TlsServerEndPoint::Undefined => Err(SaslError::MissingBinding), })?; // This might've been caused by a MITM attack diff --git a/proxy/src/scram/mod.rs b/proxy/src/scram/mod.rs index b49a9f32ee..cfa571cbe1 100644 --- a/proxy/src/scram/mod.rs +++ b/proxy/src/scram/mod.rs @@ -77,11 +77,8 @@ mod tests { const NONCE: [u8; 18] = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ]; - let mut exchange = Exchange::new( - &secret, - || NONCE, - crate::config::TlsServerEndPoint::Undefined, - ); + let mut exchange = + Exchange::new(&secret, || NONCE, crate::tls::TlsServerEndPoint::Undefined); let client_first = "n,,n=user,r=rOprNGfwEbeRWgbNEkqO"; let client_final = "c=biws,r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,p=rw1r5Kph5ThxmaUBC2GAQ6MfXbPnNkFiTIvdb/Rear0="; diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 449d50b6e7..b398c3ddd0 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -22,7 +22,7 @@ use crate::compute; use crate::compute_ctl::{ ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest, }; -use crate::config::ProxyConfig; +use crate::config::{ComputeConfig, ProxyConfig}; use crate::context::RequestContext; use crate::control_plane::client::ApiLockError; use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError}; @@ -196,7 +196,7 @@ impl PoolingBackend { }, &backend, self.config.wake_compute_retry_config, - self.config.connect_to_compute_retry_config, + &self.config.connect_to_compute, ) .await } @@ -237,7 +237,7 @@ impl PoolingBackend { }, &backend, self.config.wake_compute_retry_config, - self.config.connect_to_compute_retry_config, + &self.config.connect_to_compute, ) .await } @@ -502,7 +502,7 @@ impl ConnectMechanism for TokioMechanism { &self, ctx: &RequestContext, node_info: &CachedNodeInfo, - timeout: Duration, + compute_config: &ComputeConfig, ) -> Result { let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; @@ -511,7 +511,7 @@ impl ConnectMechanism for TokioMechanism { let config = config .user(&self.conn_info.user_info.user) .dbname(&self.conn_info.dbname) - .connect_timeout(timeout); + .connect_timeout(compute_config.timeout); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); let res = config.connect(postgres_client::NoTls).await; @@ -552,7 +552,7 @@ impl ConnectMechanism for HyperMechanism { &self, ctx: &RequestContext, node_info: &CachedNodeInfo, - timeout: Duration, + config: &ComputeConfig, ) -> Result { let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; @@ -560,7 +560,7 @@ impl ConnectMechanism for HyperMechanism { let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); let port = node_info.config.get_port(); - let res = connect_http2(&host, port, timeout).await; + let res = connect_http2(&host, port, config.timeout).await; drop(pause); let (client, connection) = permit.release_result(res)?; diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs index 812fedaf04..47326c1181 100644 --- a/proxy/src/serverless/websocket.rs +++ b/proxy/src/serverless/websocket.rs @@ -168,7 +168,7 @@ pub(crate) async fn serve_websocket( Ok(Some(p)) => { ctx.set_success(); ctx.log_connect(); - match p.proxy_pass().await { + match p.proxy_pass(&config.connect_to_compute).await { Ok(()) => Ok(()), Err(ErrorSource::Client(err)) => Err(err).context("client"), Err(ErrorSource::Compute(err)) => Err(err).context("compute"), diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs index 11f426819d..ace27a7284 100644 --- a/proxy/src/stream.rs +++ b/proxy/src/stream.rs @@ -11,9 +11,9 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio_rustls::server::TlsStream; use tracing::debug; -use crate::config::TlsServerEndPoint; use crate::error::{ErrorKind, ReportableError, UserFacingError}; use crate::metrics::Metrics; +use crate::tls::TlsServerEndPoint; /// Stream wrapper which implements libpq's protocol. /// diff --git a/proxy/src/tls/client_config.rs b/proxy/src/tls/client_config.rs new file mode 100644 index 0000000000..a2d695aae1 --- /dev/null +++ b/proxy/src/tls/client_config.rs @@ -0,0 +1,42 @@ +use std::sync::Arc; + +use anyhow::bail; +use rustls::crypto::ring; + +pub(crate) fn load_certs() -> anyhow::Result> { + let der_certs = rustls_native_certs::load_native_certs(); + + if !der_certs.errors.is_empty() { + bail!("could not parse certificates: {:?}", der_certs.errors); + } + + let mut store = rustls::RootCertStore::empty(); + store.add_parsable_certificates(der_certs.certs); + Ok(Arc::new(store)) +} + +/// Loads the root certificates and constructs a client config suitable for connecting to the neon compute. +/// This function is blocking. +pub fn compute_client_config_with_root_certs() -> anyhow::Result { + Ok( + rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_safe_default_protocol_versions() + .expect("ring should support the default protocol versions") + .with_root_certificates(load_certs()?) + .with_no_client_auth(), + ) +} + +#[cfg(test)] +pub fn compute_client_config_with_certs( + certs: impl IntoIterator>, +) -> rustls::ClientConfig { + let mut store = rustls::RootCertStore::empty(); + store.add_parsable_certificates(certs); + + rustls::ClientConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_safe_default_protocol_versions() + .expect("ring should support the default protocol versions") + .with_root_certificates(store) + .with_no_client_auth() +} diff --git a/proxy/src/tls/mod.rs b/proxy/src/tls/mod.rs new file mode 100644 index 0000000000..d6ce6bd9fc --- /dev/null +++ b/proxy/src/tls/mod.rs @@ -0,0 +1,72 @@ +pub mod client_config; +pub mod postgres_rustls; +pub mod server_config; + +use anyhow::Context; +use rustls::pki_types::CertificateDer; +use sha2::{Digest, Sha256}; +use tracing::{error, info}; +use x509_parser::oid_registry; + +/// +pub const PG_ALPN_PROTOCOL: &[u8] = b"postgresql"; + +/// Channel binding parameter +/// +/// +/// Description: The hash of the TLS server's certificate as it +/// appears, octet for octet, in the server's Certificate message. Note +/// that the Certificate message contains a certificate_list, in which +/// the first element is the server's certificate. +/// +/// The hash function is to be selected as follows: +/// +/// * if the certificate's signatureAlgorithm uses a single hash +/// function, and that hash function is either MD5 or SHA-1, then use SHA-256; +/// +/// * if the certificate's signatureAlgorithm uses a single hash +/// function and that hash function neither MD5 nor SHA-1, then use +/// the hash function associated with the certificate's +/// signatureAlgorithm; +/// +/// * if the certificate's signatureAlgorithm uses no hash functions or +/// uses multiple hash functions, then this channel binding type's +/// channel bindings are undefined at this time (updates to is channel +/// binding type may occur to address this issue if it ever arises). +#[derive(Debug, Clone, Copy)] +pub enum TlsServerEndPoint { + Sha256([u8; 32]), + Undefined, +} + +impl TlsServerEndPoint { + pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result { + let sha256_oids = [ + // I'm explicitly not adding MD5 or SHA1 here... They're bad. + oid_registry::OID_SIG_ECDSA_WITH_SHA256, + oid_registry::OID_PKCS1_SHA256WITHRSA, + ]; + + let pem = x509_parser::parse_x509_certificate(cert) + .context("Failed to parse PEM object from cerficiate")? + .1; + + info!(subject = %pem.subject, "parsing TLS certificate"); + + let reg = oid_registry::OidRegistry::default().with_all_crypto(); + let oid = pem.signature_algorithm.oid(); + let alg = reg.get(oid); + if sha256_oids.contains(oid) { + let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into(); + info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding"); + Ok(Self::Sha256(tls_server_end_point)) + } else { + error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding"); + Ok(Self::Undefined) + } + } + + pub fn supported(&self) -> bool { + !matches!(self, TlsServerEndPoint::Undefined) + } +} diff --git a/proxy/src/postgres_rustls/mod.rs b/proxy/src/tls/postgres_rustls.rs similarity index 96% rename from proxy/src/postgres_rustls/mod.rs rename to proxy/src/tls/postgres_rustls.rs index 5ef20991c3..0ad279b635 100644 --- a/proxy/src/postgres_rustls/mod.rs +++ b/proxy/src/tls/postgres_rustls.rs @@ -18,7 +18,7 @@ mod private { use tokio_rustls::client::TlsStream; use tokio_rustls::TlsConnector; - use crate::config::TlsServerEndPoint; + use crate::tls::TlsServerEndPoint; pub struct TlsConnectFuture { inner: tokio_rustls::Connect, @@ -126,16 +126,14 @@ mod private { /// That way you can connect to PostgreSQL using `rustls` as the TLS stack. #[derive(Clone)] pub struct MakeRustlsConnect { - config: Arc, + pub config: Arc, } impl MakeRustlsConnect { /// Creates a new `MakeRustlsConnect` from the provided `ClientConfig`. #[must_use] - pub fn new(config: ClientConfig) -> Self { - Self { - config: Arc::new(config), - } + pub fn new(config: Arc) -> Self { + Self { config } } } diff --git a/proxy/src/tls/server_config.rs b/proxy/src/tls/server_config.rs new file mode 100644 index 0000000000..2cc1657eea --- /dev/null +++ b/proxy/src/tls/server_config.rs @@ -0,0 +1,218 @@ +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use anyhow::{bail, Context}; +use itertools::Itertools; +use rustls::crypto::ring::{self, sign}; +use rustls::pki_types::{CertificateDer, PrivateKeyDer}; + +use super::{TlsServerEndPoint, PG_ALPN_PROTOCOL}; + +pub struct TlsConfig { + pub config: Arc, + pub common_names: HashSet, + pub cert_resolver: Arc, +} + +impl TlsConfig { + pub fn to_server_config(&self) -> Arc { + self.config.clone() + } +} + +/// Configure TLS for the main endpoint. +pub fn configure_tls( + key_path: &str, + cert_path: &str, + certs_dir: Option<&String>, + allow_tls_keylogfile: bool, +) -> anyhow::Result { + let mut cert_resolver = CertResolver::new(); + + // add default certificate + cert_resolver.add_cert_path(key_path, cert_path, true)?; + + // add extra certificates + if let Some(certs_dir) = certs_dir { + for entry in std::fs::read_dir(certs_dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_dir() { + // file names aligned with default cert-manager names + let key_path = path.join("tls.key"); + let cert_path = path.join("tls.crt"); + if key_path.exists() && cert_path.exists() { + cert_resolver.add_cert_path( + &key_path.to_string_lossy(), + &cert_path.to_string_lossy(), + false, + )?; + } + } + } + } + + let common_names = cert_resolver.get_common_names(); + + let cert_resolver = Arc::new(cert_resolver); + + // allow TLS 1.2 to be compatible with older client libraries + let mut config = + rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider())) + .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12]) + .context("ring should support TLS1.2 and TLS1.3")? + .with_no_client_auth() + .with_cert_resolver(cert_resolver.clone()); + + config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()]; + + if allow_tls_keylogfile { + // KeyLogFile will check for the SSLKEYLOGFILE environment variable. + config.key_log = Arc::new(rustls::KeyLogFile::new()); + } + + Ok(TlsConfig { + config: Arc::new(config), + common_names, + cert_resolver, + }) +} + +#[derive(Default, Debug)] +pub struct CertResolver { + certs: HashMap, TlsServerEndPoint)>, + default: Option<(Arc, TlsServerEndPoint)>, +} + +impl CertResolver { + pub fn new() -> Self { + Self::default() + } + + fn add_cert_path( + &mut self, + key_path: &str, + cert_path: &str, + is_default: bool, + ) -> anyhow::Result<()> { + let priv_key = { + let key_bytes = std::fs::read(key_path) + .with_context(|| format!("Failed to read TLS keys at '{key_path}'"))?; + rustls_pemfile::private_key(&mut &key_bytes[..]) + .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? + .with_context(|| format!("Failed to parse TLS keys at '{key_path}'"))? + }; + + let cert_chain_bytes = std::fs::read(cert_path) + .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?; + + let cert_chain = { + rustls_pemfile::certs(&mut &cert_chain_bytes[..]) + .try_collect() + .with_context(|| { + format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.") + })? + }; + + self.add_cert(priv_key, cert_chain, is_default) + } + + pub fn add_cert( + &mut self, + priv_key: PrivateKeyDer<'static>, + cert_chain: Vec>, + is_default: bool, + ) -> anyhow::Result<()> { + let key = sign::any_supported_type(&priv_key).context("invalid private key")?; + + let first_cert = &cert_chain[0]; + let tls_server_end_point = TlsServerEndPoint::new(first_cert)?; + let pem = x509_parser::parse_x509_certificate(first_cert) + .context("Failed to parse PEM object from cerficiate")? + .1; + + let common_name = pem.subject().to_string(); + + // We need to get the canonical name for this certificate so we can match them against any domain names + // seen within the proxy codebase. + // + // In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI. + // We need to remove the wildcard prefix for the purposes of certificate selection. + // + // auth-broker does not use SNI and instead uses the Neon-Connection-String header. + // Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String. + // + // Console Redirect proxy does not use any wildcard domains and does not need any certificate selection or conn string + // validation, so let's we can continue with any common-name + let common_name = if let Some(s) = common_name.strip_prefix("CN=*.") { + s.to_string() + } else if let Some(s) = common_name.strip_prefix("CN=apiauth.") { + s.to_string() + } else if let Some(s) = common_name.strip_prefix("CN=") { + s.to_string() + } else { + bail!("Failed to parse common name from certificate") + }; + + let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)); + + if is_default { + self.default = Some((cert.clone(), tls_server_end_point)); + } + + self.certs.insert(common_name, (cert, tls_server_end_point)); + + Ok(()) + } + + pub fn get_common_names(&self) -> HashSet { + self.certs.keys().map(|s| s.to_string()).collect() + } +} + +impl rustls::server::ResolvesServerCert for CertResolver { + fn resolve( + &self, + client_hello: rustls::server::ClientHello<'_>, + ) -> Option> { + self.resolve(client_hello.server_name()).map(|x| x.0) + } +} + +impl CertResolver { + pub fn resolve( + &self, + server_name: Option<&str>, + ) -> Option<(Arc, TlsServerEndPoint)> { + // loop here and cut off more and more subdomains until we find + // a match to get a proper wildcard support. OTOH, we now do not + // use nested domains, so keep this simple for now. + // + // With the current coding foo.com will match *.foo.com and that + // repeats behavior of the old code. + if let Some(mut sni_name) = server_name { + loop { + if let Some(cert) = self.certs.get(sni_name) { + return Some(cert.clone()); + } + if let Some((_, rest)) = sni_name.split_once('.') { + sni_name = rest; + } else { + return None; + } + } + } else { + // No SNI, use the default certificate, otherwise we can't get to + // options parameter which can be used to set endpoint name too. + // That means that non-SNI flow will not work for CNAME domains in + // verify-full mode. + // + // If that will be a problem we can: + // + // a) Instead of multi-cert approach use single cert with extra + // domains listed in Subject Alternative Name (SAN). + // b) Deploy separate proxy instances for extra domains. + self.default.clone() + } + } +} From b3cd883f93e63d9dab6f3ac8e6e8e3f697c14c29 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 2 Jan 2025 14:28:15 +0300 Subject: [PATCH 58/89] Unlock LFC mutex when LFC cache is disabled (#10235) ## Problem See https://github.com/neondatabase/neon/issues/10233 `lfc_containsv` returns with holding lock when LFC was disabled. This bug was introduced in commit 78938d1b59 ## Summary of changes Release lock before return. Co-authored-by: Konstantin Knizhnik --- pgxn/neon/file_cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index f49415be68..ad5667cbab 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -541,6 +541,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, } else { + LWLockRelease(lfc_lock); return found; } From 26600f2973b9d9495889a347cdd21f2c3b7c99e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 2 Jan 2025 12:33:42 +0100 Subject: [PATCH 59/89] Skip running clippy without default features (#10098) ## Problem Running clippy with `cargo hack --feature-powerset` in CI isn't particularly fast. This PR follows-up on https://github.com/neondatabase/neon/pull/8912 to improve the speed of our clippy runs. Parallelism as suggested in https://github.com/neondatabase/neon/issues/9901 was tested, but didn't show consistent enough improvements to be worth it. It actually increased the amount of work done, as there's less cache hits when clippy runs are spread out over multiple target directories. Additionally, parallelism makes it so caching needs to be thought about more actively and copying around target directories to enable parallelism eats the rest of the performance gains from parallel execution. After some discussion, the decision was to instead cut down on the number of jobs that are running further. The easiest way to do this is to not run clippy *without* default features. The list of default features is empty for all crates, and I haven't found anything using `cfg(feature = "default")` either, so this is likely not going to change anything except speeding the runs up. ## Summary of changes Reduce the amount of feature combinations tried by `cargo hack` (as suggested in https://github.com/neondatabase/neon/pull/8912#pullrequestreview-2286482368) by never disabling default features. ## Alternatives - We can split things out into different jobs which reduces the time until everything is finished by running more things in parallel. This does however decreases the amount of cache hits and increases the amount of time spent on overhead tasks like repo cloning and restoring caches by doing those multiple times instead of once. - We could replace `cargo hack [...] clippy` with `cargo clippy [...]; cargo clippy --features testing`. I'm not 100% sure how this compares to the change here in the PR, but it does seem to run a bit faster. That likely means it's doing less work, but without understanding what exactly we loose by that I'd rather not do that for now. I'd appreciate input on this though. --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 55c4bf08b9..12b1ac98ac 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -212,7 +212,7 @@ jobs: fi echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV - name: Run cargo clippy (debug) - run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS + run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS - name: Check documentation generation run: cargo doc --workspace --no-deps --document-private-items From ee22d4c9ef3d8dc59360e7675b0c36b5e5fd7be8 Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Thu, 2 Jan 2025 14:32:24 +0100 Subject: [PATCH 60/89] proxy: Set TCP_NODELAY for compute connections (#10240) neondatabase/cloud#19184 --- proxy/src/compute.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index d60dfd0f80..89de6692ad 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -193,11 +193,15 @@ impl ConnCfg { let connect_once = |host, port| { debug!("trying to connect to compute node at {host}:{port}"); - connect_with_timeout(host, port).and_then(|socket| async { - let socket_addr = socket.peer_addr()?; + connect_with_timeout(host, port).and_then(|stream| async { + let socket_addr = stream.peer_addr()?; + let socket = socket2::SockRef::from(&stream); + // Disable Nagle's algorithm to not introduce latency between + // client and compute. + socket.set_nodelay(true)?; // This prevents load balancer from severing the connection. - socket2::SockRef::from(&socket).set_keepalive(true)?; - Ok((socket_addr, socket)) + socket.set_keepalive(true)?; + Ok((socket_addr, stream)) }) }; From 8c7dcd259846646858b230a05fd3923361d8a717 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 2 Jan 2025 17:14:18 +0300 Subject: [PATCH 61/89] Set heartbeat interval for chaos test (#10222) ## Problem See https://neondb.slack.com/archives/C033RQ5SPDH/p1734707873215729 test_timeline_archival_chaos becomes more flaky with increased heartbeat interval Resolves #10250. ## Summary of changes Override heatbeat interval for `test_timelirn_archival_chaos.py` --------- Co-authored-by: Konstantin Knizhnik --- test_runner/regress/test_timeline_archive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index 87579f9e92..9b3a48add9 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -398,6 +398,7 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): # Offloading is off by default at time of writing: remove this line when it's on by default neon_env_builder.pageserver_config_override = "timeline_offloading = true" + neon_env_builder.storage_controller_config = {"heartbeat_interval": "100msec"} neon_env_builder.enable_pageserver_remote_storage(s3_storage()) # We will exercise migrations, so need multiple pageservers From 1622fd8bda096d1eefb81bf4970ee54542be4595 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Thu, 2 Jan 2025 17:02:48 +0100 Subject: [PATCH 62/89] proxy: recognize but ignore the 3 new redis message types (#10197) ## Problem https://neondb.slack.com/archives/C085MBDUSS2/p1734604792755369 ## Summary of changes Recognize and ignore the 3 new broadcast messages: - `/block_public_or_vpc_access_updated` - `/allowed_vpc_endpoints_updated_for_org` - `/allowed_vpc_endpoints_updated_for_projects` --- proxy/src/redis/notifications.rs | 56 +++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs index 80b93b6c4f..671305a300 100644 --- a/proxy/src/redis/notifications.rs +++ b/proxy/src/redis/notifications.rs @@ -40,6 +40,27 @@ pub(crate) enum Notification { AllowedIpsUpdate { allowed_ips_update: AllowedIpsUpdate, }, + #[serde( + rename = "/block_public_or_vpc_access_updated", + deserialize_with = "deserialize_json_string" + )] + BlockPublicOrVpcAccessUpdated { + block_public_or_vpc_access_updated: BlockPublicOrVpcAccessUpdated, + }, + #[serde( + rename = "/allowed_vpc_endpoints_updated_for_org", + deserialize_with = "deserialize_json_string" + )] + AllowedVpcEndpointsUpdatedForOrg { + allowed_vpc_endpoints_updated_for_org: AllowedVpcEndpointsUpdatedForOrg, + }, + #[serde( + rename = "/allowed_vpc_endpoints_updated_for_projects", + deserialize_with = "deserialize_json_string" + )] + AllowedVpcEndpointsUpdatedForProjects { + allowed_vpc_endpoints_updated_for_projects: AllowedVpcEndpointsUpdatedForProjects, + }, #[serde( rename = "/password_updated", deserialize_with = "deserialize_json_string" @@ -52,6 +73,24 @@ pub(crate) enum Notification { pub(crate) struct AllowedIpsUpdate { project_id: ProjectIdInt, } + +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] +pub(crate) struct BlockPublicOrVpcAccessUpdated { + project_id: ProjectIdInt, +} + +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] +pub(crate) struct AllowedVpcEndpointsUpdatedForOrg { + // TODO: change type once the implementation is more fully fledged. + // See e.g. https://github.com/neondatabase/neon/pull/10073. + account_id: ProjectIdInt, +} + +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] +pub(crate) struct AllowedVpcEndpointsUpdatedForProjects { + project_ids: Vec, +} + #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub(crate) struct PasswordUpdate { project_id: ProjectIdInt, @@ -165,7 +204,11 @@ impl MessageHandler { } } } - Notification::AllowedIpsUpdate { .. } | Notification::PasswordUpdate { .. } => { + Notification::AllowedIpsUpdate { .. } + | Notification::PasswordUpdate { .. } + | Notification::BlockPublicOrVpcAccessUpdated { .. } + | Notification::AllowedVpcEndpointsUpdatedForOrg { .. } + | Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => { invalidate_cache(self.cache.clone(), msg.clone()); if matches!(msg, Notification::AllowedIpsUpdate { .. }) { Metrics::get() @@ -178,6 +221,8 @@ impl MessageHandler { .redis_events_count .inc(RedisEventsCount::PasswordUpdate); } + // TODO: add additional metrics for the other event types. + // It might happen that the invalid entry is on the way to be cached. // To make sure that the entry is invalidated, let's repeat the invalidation in INVALIDATION_LAG seconds. // TODO: include the version (or the timestamp) in the message and invalidate only if the entry is cached before the message. @@ -204,6 +249,15 @@ fn invalidate_cache(cache: Arc, msg: Notification) { password_update.role_name, ), Notification::Cancel(_) => unreachable!("cancel message should be handled separately"), + Notification::BlockPublicOrVpcAccessUpdated { .. } => { + // https://github.com/neondatabase/neon/pull/10073 + } + Notification::AllowedVpcEndpointsUpdatedForOrg { .. } => { + // https://github.com/neondatabase/neon/pull/10073 + } + Notification::AllowedVpcEndpointsUpdatedForProjects { .. } => { + // https://github.com/neondatabase/neon/pull/10073 + } } } From 56e6ebfe172ce0c7fb0faf9a3e64e8e2b3902b37 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 2 Jan 2025 16:05:14 +0000 Subject: [PATCH 63/89] chore: building compute_tools and local_proxy together (#10257) ## Problem Building local_proxy and compute_tools features the same dependency tree, but as they are currently built in separate clean layers all that progress is wasted. For our arm builds that's an extra 10 minutes. ## Summary of changes Combines the compute_tools and local_proxy build layers. --- compute/compute-node.Dockerfile | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 5e7b4e8287..06aaf9e7f4 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1285,7 +1285,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \ ######################################################################################### # -# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries +# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries # ######################################################################################### FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools @@ -1295,7 +1295,7 @@ ENV BUILD_TAG=$BUILD_TAG USER nonroot # Copy entire project to get Cargo.* files with proper dependencies for the whole project COPY --chown=nonroot . . -RUN cd compute_tools && mold -run cargo build --locked --profile release-line-debug-size-lto +RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy ######################################################################################### # @@ -1338,20 +1338,6 @@ RUN set -e \ && make -j $(nproc) dist_man_MANS= \ && make install dist_man_MANS= -######################################################################################### -# -# Compile the Neon-specific `local_proxy` binary -# -######################################################################################### -FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy -ARG BUILD_TAG -ENV BUILD_TAG=$BUILD_TAG - -USER nonroot -# Copy entire project to get Cargo.* files with proper dependencies for the whole project -COPY --chown=nonroot . . -RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy - ######################################################################################### # # Layers "postgres-exporter" and "sql-exporter" @@ -1491,7 +1477,7 @@ COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/ COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini # local_proxy and its config -COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy +COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy # Metrics exporter binaries and configuration files From 363ea97f691cbf9026309c08239300a0d351018d Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Thu, 2 Jan 2025 12:37:50 -0600 Subject: [PATCH 64/89] Add more substantial tests for compute migrations (#9811) The previous tests really didn't do much. This set should be quite a bit more encompassing. Signed-off-by: Tristan Partin --- Cargo.lock | 1 + compute_tools/Cargo.toml | 3 +- compute_tools/src/bin/compute_ctl.rs | 5 ++ compute_tools/src/compute.rs | 15 +++- compute_tools/src/http/api.rs | 15 ++++ compute_tools/src/migration.rs | 41 ++++++++- .../tests/0001-neon_superuser_bypass_rls.sql | 9 ++ .../src/migrations/tests/0002-alter_roles.sql | 25 ++++++ ..._create_subscription_to_neon_superuser.sql | 10 +++ ...004-grant_pg_monitor_to_neon_superuser.sql | 19 ++++ ...-grant_all_on_tables_to_neon_superuser.sql | 2 + ...ant_all_on_sequences_to_neon_superuser.sql | 2 + ...es_to_neon_superuser_with_grant_option.sql | 2 + ...es_to_neon_superuser_with_grant_option.sql | 2 + ...plication_for_previously_allowed_roles.sql | 2 + ...ynchronization_funcs_to_neon_superuser.sql | 13 +++ ...cation_origin_status_to_neon_superuser.sql | 13 +++ libs/utils/src/failpoint_support.rs | 4 +- test_runner/conftest.py | 1 + test_runner/fixtures/compute_migrations.py | 34 +++++++ test_runner/fixtures/endpoint/http.py | 14 +++ test_runner/fixtures/neon_cli.py | 5 +- test_runner/fixtures/neon_fixtures.py | 12 ++- test_runner/fixtures/paths.py | 2 +- .../regress/test_compute_migrations.py | 90 +++++++++++++++++++ test_runner/regress/test_migrations.py | 33 ------- 26 files changed, 327 insertions(+), 47 deletions(-) create mode 100644 compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql create mode 100644 compute_tools/src/migrations/tests/0002-alter_roles.sql create mode 100644 compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql create mode 100644 compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql create mode 100644 compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql create mode 100644 compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql create mode 100644 compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql create mode 100644 compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql create mode 100644 compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql create mode 100644 compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql create mode 100644 compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql create mode 100644 test_runner/fixtures/compute_migrations.py create mode 100644 test_runner/regress/test_compute_migrations.py delete mode 100644 test_runner/regress/test_migrations.py diff --git a/Cargo.lock b/Cargo.lock index d9ac167042..420def152d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1274,6 +1274,7 @@ dependencies = [ "chrono", "clap", "compute_api", + "fail", "flate2", "futures", "hyper 0.14.30", diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index c0c390caef..9525b27818 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true [features] default = [] # Enables test specific features. -testing = [] +testing = ["fail/failpoints"] [dependencies] base64.workspace = true @@ -19,6 +19,7 @@ camino.workspace = true chrono.workspace = true cfg-if.workspace = true clap.workspace = true +fail.workspace = true flate2.workspace = true futures.workspace = true hyper0 = { workspace = true, features = ["full"] } diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index bb248734a8..95ade9a87d 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -67,12 +67,15 @@ use compute_tools::params::*; use compute_tools::spec::*; use compute_tools::swap::resize_swap; use rlimit::{setrlimit, Resource}; +use utils::failpoint_support; // this is an arbitrary build tag. Fine as a default / for testing purposes // in-case of not-set environment var const BUILD_TAG_DEFAULT: &str = "latest"; fn main() -> Result<()> { + let scenario = failpoint_support::init(); + let (build_tag, clap_args) = init()?; // enable core dumping for all child processes @@ -100,6 +103,8 @@ fn main() -> Result<()> { maybe_delay_exit(delay_exit); + scenario.teardown(); + deinit_and_exit(wait_pg_result); } diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index d72a04f2f9..78f6033429 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1181,8 +1181,19 @@ impl ComputeNode { let mut conf = postgres::config::Config::from(conf); conf.application_name("compute_ctl:migrations"); - let mut client = conf.connect(NoTls)?; - handle_migrations(&mut client).context("apply_config handle_migrations") + match conf.connect(NoTls) { + Ok(mut client) => { + if let Err(e) = handle_migrations(&mut client) { + error!("Failed to run migrations: {}", e); + } + } + Err(e) => { + error!( + "Failed to connect to the compute for running migrations: {}", + e + ); + } + }; }); Ok::<(), anyhow::Error>(()) diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index 7fa6426d8f..a4b1a63e6d 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -24,8 +24,11 @@ use metrics::proto::MetricFamily; use metrics::Encoder; use metrics::TextEncoder; use tokio::task; +use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, warn}; use tracing_utils::http::OtelName; +use utils::failpoint_support::failpoints_handler; +use utils::http::error::ApiError; use utils::http::request::must_get_query_param; fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { @@ -310,6 +313,18 @@ async fn routes(req: Request, compute: &Arc) -> Response { + match failpoints_handler(req, CancellationToken::new()).await { + Ok(r) => r, + Err(ApiError::BadRequest(e)) => { + render_json_error(&e.to_string(), StatusCode::BAD_REQUEST) + } + Err(_) => { + render_json_error("Internal server error", StatusCode::INTERNAL_SERVER_ERROR) + } + } + } + // download extension files from remote extension storage on demand (&Method::POST, route) if route.starts_with("/extension_server/") => { info!("serving {:?} POST request", route); diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs index 22ab145eda..07d738abe9 100644 --- a/compute_tools/src/migration.rs +++ b/compute_tools/src/migration.rs @@ -1,13 +1,16 @@ use anyhow::{Context, Result}; +use fail::fail_point; use postgres::Client; use tracing::info; +/// Runs a series of migrations on a target database pub(crate) struct MigrationRunner<'m> { client: &'m mut Client, migrations: &'m [&'m str], } impl<'m> MigrationRunner<'m> { + /// Create a new migration runner pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self { // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64 assert!(migrations.len() + 1 < i64::MAX as usize); @@ -15,6 +18,7 @@ impl<'m> MigrationRunner<'m> { Self { client, migrations } } + /// Get the current value neon_migration.migration_id fn get_migration_id(&mut self) -> Result { let query = "SELECT id FROM neon_migration.migration_id"; let row = self @@ -25,9 +29,34 @@ impl<'m> MigrationRunner<'m> { Ok(row.get::<&str, i64>("id")) } + /// Update the neon_migration.migration_id value + /// + /// This function has a fail point called compute-migration, which can be + /// used if you would like to fail the application of a series of migrations + /// at some point. fn update_migration_id(&mut self, migration_id: i64) -> Result<()> { let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id); + // We use this fail point in order to check that failing in the + // middle of applying a series of migrations fails in an expected + // manner + if cfg!(feature = "testing") { + let fail = (|| { + fail_point!("compute-migration", |fail_migration_id| { + migration_id == fail_migration_id.unwrap().parse::().unwrap() + }); + + false + })(); + + if fail { + return Err(anyhow::anyhow!(format!( + "migration {} was configured to fail because of a failpoint", + migration_id + ))); + } + } + self.client .simple_query(&setval) .context("run_migrations update id")?; @@ -35,7 +64,8 @@ impl<'m> MigrationRunner<'m> { Ok(()) } - fn prepare_migrations(&mut self) -> Result<()> { + /// Prepare the migrations the target database for handling migrations + fn prepare_database(&mut self) -> Result<()> { let query = "CREATE SCHEMA IF NOT EXISTS neon_migration"; self.client.simple_query(query)?; @@ -54,8 +84,9 @@ impl<'m> MigrationRunner<'m> { Ok(()) } + /// Run the configrured set of migrations pub fn run_migrations(mut self) -> Result<()> { - self.prepare_migrations()?; + self.prepare_database()?; let mut current_migration = self.get_migration_id()? as usize; while current_migration < self.migrations.len() { @@ -69,6 +100,11 @@ impl<'m> MigrationRunner<'m> { if migration.starts_with("-- SKIP") { info!("Skipping migration id={}", migration_id!(current_migration)); + + // Even though we are skipping the migration, updating the + // migration ID should help keep logic easy to understand when + // trying to understand the state of a cluster. + self.update_migration_id(migration_id!(current_migration))?; } else { info!( "Running migration id={}:\n{}\n", @@ -87,7 +123,6 @@ impl<'m> MigrationRunner<'m> { ) })?; - // Migration IDs start at 1 self.update_migration_id(migration_id!(current_migration))?; self.client diff --git a/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql b/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql new file mode 100644 index 0000000000..0c81cef1c4 --- /dev/null +++ b/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql @@ -0,0 +1,9 @@ +DO $$ +DECLARE + bypassrls boolean; +BEGIN + SELECT rolbypassrls INTO bypassrls FROM pg_roles WHERE rolname = 'neon_superuser'; + IF NOT bypassrls THEN + RAISE EXCEPTION 'neon_superuser cannot bypass RLS'; + END IF; +END $$; diff --git a/compute_tools/src/migrations/tests/0002-alter_roles.sql b/compute_tools/src/migrations/tests/0002-alter_roles.sql new file mode 100644 index 0000000000..433f7b34f7 --- /dev/null +++ b/compute_tools/src/migrations/tests/0002-alter_roles.sql @@ -0,0 +1,25 @@ +DO $$ +DECLARE + role record; +BEGIN + FOR role IN + SELECT rolname AS name, rolinherit AS inherit + FROM pg_roles + WHERE pg_has_role(rolname, 'neon_superuser', 'member') + LOOP + IF NOT role.inherit THEN + RAISE EXCEPTION '% cannot inherit', quote_ident(role.name); + END IF; + END LOOP; + + FOR role IN + SELECT rolname AS name, rolbypassrls AS bypassrls + FROM pg_roles + WHERE NOT pg_has_role(rolname, 'neon_superuser', 'member') + AND NOT starts_with(rolname, 'pg_') + LOOP + IF role.bypassrls THEN + RAISE EXCEPTION '% can bypass RLS', quote_ident(role.name); + END IF; + END LOOP; +END $$; diff --git a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql new file mode 100644 index 0000000000..b164d61295 --- /dev/null +++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql @@ -0,0 +1,10 @@ +DO $$ +BEGIN + IF (SELECT current_setting('server_version_num')::numeric < 160000) THEN + RETURN; + END IF; + + IF NOT (SELECT pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN + RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription'; + END IF; +END $$; diff --git a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql new file mode 100644 index 0000000000..acb8dd417d --- /dev/null +++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql @@ -0,0 +1,19 @@ +DO $$ +DECLARE + monitor record; +BEGIN + SELECT pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member, + admin_option AS admin + INTO monitor + FROM pg_auth_members + WHERE roleid = 'pg_monitor'::regrole + AND member = 'pg_monitor'::regrole; + + IF NOT monitor.member THEN + RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor'; + END IF; + + IF NOT monitor.admin THEN + RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor'; + END IF; +END $$; diff --git a/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql new file mode 100644 index 0000000000..f99101bd65 --- /dev/null +++ b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql @@ -0,0 +1,2 @@ +-- This test was never written becuase at the time migration tests were added +-- the accompanying migration was already skipped. diff --git a/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql new file mode 100644 index 0000000000..f99101bd65 --- /dev/null +++ b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql @@ -0,0 +1,2 @@ +-- This test was never written becuase at the time migration tests were added +-- the accompanying migration was already skipped. diff --git a/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql new file mode 100644 index 0000000000..f99101bd65 --- /dev/null +++ b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql @@ -0,0 +1,2 @@ +-- This test was never written becuase at the time migration tests were added +-- the accompanying migration was already skipped. diff --git a/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql new file mode 100644 index 0000000000..f99101bd65 --- /dev/null +++ b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql @@ -0,0 +1,2 @@ +-- This test was never written becuase at the time migration tests were added +-- the accompanying migration was already skipped. diff --git a/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql b/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql new file mode 100644 index 0000000000..f99101bd65 --- /dev/null +++ b/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql @@ -0,0 +1,2 @@ +-- This test was never written becuase at the time migration tests were added +-- the accompanying migration was already skipped. diff --git a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql new file mode 100644 index 0000000000..af7f50e95d --- /dev/null +++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql @@ -0,0 +1,13 @@ +DO $$ +DECLARE + can_execute boolean; +BEGIN + SELECT bool_and(has_function_privilege('neon_superuser', oid, 'execute')) + INTO can_execute + FROM pg_proc + WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot') + AND pronamespace = 'pg_catalog'::regnamespace; + IF NOT can_execute THEN + RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot'; + END IF; +END $$; diff --git a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql new file mode 100644 index 0000000000..e55dcdc3b6 --- /dev/null +++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql @@ -0,0 +1,13 @@ +DO $$ +DECLARE + can_execute boolean; +BEGIN + SELECT has_function_privilege('neon_superuser', oid, 'execute') + INTO can_execute + FROM pg_proc + WHERE proname = 'pg_show_replication_origin_status' + AND pronamespace = 'pg_catalog'::regnamespace; + IF NOT can_execute THEN + RAISE EXCEPTION 'neon_superuser cannot execute pg_show_replication_origin_status'; + END IF; +END $$; diff --git a/libs/utils/src/failpoint_support.rs b/libs/utils/src/failpoint_support.rs index 870684b399..701ba2d42c 100644 --- a/libs/utils/src/failpoint_support.rs +++ b/libs/utils/src/failpoint_support.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; use tracing::*; -/// Declare a failpoint that can use the `pause` failpoint action. +/// Declare a failpoint that can use to `pause` failpoint action. /// We don't want to block the executor thread, hence, spawn_blocking + await. #[macro_export] macro_rules! pausable_failpoint { @@ -181,7 +181,7 @@ pub async fn failpoints_handler( ) -> Result, ApiError> { if !fail::has_failpoints() { return Err(ApiError::BadRequest(anyhow::anyhow!( - "Cannot manage failpoints because storage was compiled without failpoints support" + "Cannot manage failpoints because neon was compiled without failpoints support" ))); } diff --git a/test_runner/conftest.py b/test_runner/conftest.py index 887bfef478..9e32469d69 100644 --- a/test_runner/conftest.py +++ b/test_runner/conftest.py @@ -8,6 +8,7 @@ pytest_plugins = ( "fixtures.compute_reconfigure", "fixtures.storage_controller_proxy", "fixtures.paths", + "fixtures.compute_migrations", "fixtures.neon_fixtures", "fixtures.benchmark_fixture", "fixtures.pg_stats", diff --git a/test_runner/fixtures/compute_migrations.py b/test_runner/fixtures/compute_migrations.py new file mode 100644 index 0000000000..ea99785af0 --- /dev/null +++ b/test_runner/fixtures/compute_migrations.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import os +from typing import TYPE_CHECKING + +import pytest + +from fixtures.paths import BASE_DIR + +if TYPE_CHECKING: + from collections.abc import Iterator + from pathlib import Path + +COMPUTE_MIGRATIONS_DIR = BASE_DIR / "compute_tools" / "src" / "migrations" +COMPUTE_MIGRATIONS_TEST_DIR = COMPUTE_MIGRATIONS_DIR / "tests" + +COMPUTE_MIGRATIONS = sorted(next(os.walk(COMPUTE_MIGRATIONS_DIR))[2]) +NUM_COMPUTE_MIGRATIONS = len(COMPUTE_MIGRATIONS) + + +@pytest.fixture(scope="session") +def compute_migrations_dir() -> Iterator[Path]: + """ + Retrieve the path to the compute migrations directory. + """ + yield COMPUTE_MIGRATIONS_DIR + + +@pytest.fixture(scope="session") +def compute_migrations_test_dir() -> Iterator[Path]: + """ + Retrieve the path to the compute migrations test directory. + """ + yield COMPUTE_MIGRATIONS_TEST_DIR diff --git a/test_runner/fixtures/endpoint/http.py b/test_runner/fixtures/endpoint/http.py index 1cd9158c68..aa0d95fe80 100644 --- a/test_runner/fixtures/endpoint/http.py +++ b/test_runner/fixtures/endpoint/http.py @@ -55,3 +55,17 @@ class EndpointHttpClient(requests.Session): res = self.get(f"http://localhost:{self.port}/metrics") res.raise_for_status() return res.text + + def configure_failpoints(self, *args: tuple[str, str]) -> None: + body: list[dict[str, str]] = [] + + for fp in args: + body.append( + { + "name": fp[0], + "action": fp[1], + } + ) + + res = self.post(f"http://localhost:{self.port}/failpoints", json=body) + res.raise_for_status() diff --git a/test_runner/fixtures/neon_cli.py b/test_runner/fixtures/neon_cli.py index a85a191455..adbd6414a7 100644 --- a/test_runner/fixtures/neon_cli.py +++ b/test_runner/fixtures/neon_cli.py @@ -522,14 +522,15 @@ class NeonLocalCli(AbstractNeonCli): safekeepers: list[int] | None = None, remote_ext_config: str | None = None, pageserver_id: int | None = None, - allow_multiple=False, + allow_multiple: bool = False, basebackup_request_tries: int | None = None, + env: dict[str, str] | None = None, ) -> subprocess.CompletedProcess[str]: args = [ "endpoint", "start", ] - extra_env_vars = {} + extra_env_vars = env or {} if basebackup_request_tries is not None: extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries) if remote_ext_config is not None: diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 9f78ad120b..a0c642163d 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -54,6 +54,7 @@ from fixtures.common_types import ( TimelineArchivalState, TimelineId, ) +from fixtures.compute_migrations import NUM_COMPUTE_MIGRATIONS from fixtures.endpoint.http import EndpointHttpClient from fixtures.h2server import H2Server from fixtures.log_helper import log @@ -3855,6 +3856,7 @@ class Endpoint(PgProtocol, LogUtils): safekeepers: list[int] | None = None, allow_multiple: bool = False, basebackup_request_tries: int | None = None, + env: dict[str, str] | None = None, ) -> Self: """ Start the Postgres instance. @@ -3875,6 +3877,7 @@ class Endpoint(PgProtocol, LogUtils): pageserver_id=pageserver_id, allow_multiple=allow_multiple, basebackup_request_tries=basebackup_request_tries, + env=env, ) self._running.release(1) self.log_config_value("shared_buffers") @@ -3988,14 +3991,17 @@ class Endpoint(PgProtocol, LogUtils): log.info("Updating compute spec to: %s", json.dumps(data_dict, indent=4)) json.dump(data_dict, file, indent=4) - # Please note: Migrations only run if pg_skip_catalog_updates is false - def wait_for_migrations(self, num_migrations: int = 11): + def wait_for_migrations(self, wait_for: int = NUM_COMPUTE_MIGRATIONS) -> None: + """ + Wait for all compute migrations to be ran. Remember that migrations only + run if "pg_skip_catalog_updates" is set in the compute spec to false. + """ with self.cursor() as cur: def check_migrations_done(): cur.execute("SELECT id FROM neon_migration.migration_id") migration_id: int = cur.fetchall()[0][0] - assert migration_id >= num_migrations + assert migration_id >= wait_for wait_until(check_migrations_done) diff --git a/test_runner/fixtures/paths.py b/test_runner/fixtures/paths.py index 80777d65e9..fc4fb3629b 100644 --- a/test_runner/fixtures/paths.py +++ b/test_runner/fixtures/paths.py @@ -21,8 +21,8 @@ if TYPE_CHECKING: BASE_DIR = Path(__file__).parents[2] -COMPUTE_CONFIG_DIR = BASE_DIR / "compute" / "etc" DEFAULT_OUTPUT_DIR: str = "test_output" +COMPUTE_CONFIG_DIR = BASE_DIR / "compute" / "etc" def get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str | None = None) -> Path: diff --git a/test_runner/regress/test_compute_migrations.py b/test_runner/regress/test_compute_migrations.py new file mode 100644 index 0000000000..803702a6f8 --- /dev/null +++ b/test_runner/regress/test_compute_migrations.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING, cast + +import pytest +from fixtures.compute_migrations import COMPUTE_MIGRATIONS, NUM_COMPUTE_MIGRATIONS + +if TYPE_CHECKING: + from fixtures.neon_fixtures import NeonEnv + + +def test_compute_migrations_retry(neon_simple_env: NeonEnv, compute_migrations_dir: Path): + """ + Test that compute_ctl can recover from migration failures next time it + starts, and that the persisted migration ID is correct in such cases. + """ + env = neon_simple_env + + endpoint = env.endpoints.create("main") + endpoint.respec(skip_pg_catalog_updates=False) + + for i in range(1, NUM_COMPUTE_MIGRATIONS + 1): + endpoint.start(env={"FAILPOINTS": f"compute-migration=return({i})"}) + + # Make sure that the migrations ran + endpoint.wait_for_migrations(wait_for=i - 1) + + # Confirm that we correctly recorded that in the + # neon_migration.migration_id table + with endpoint.cursor() as cur: + cur.execute("SELECT id FROM neon_migration.migration_id") + migration_id = cast("int", cur.fetchall()[0][0]) + assert migration_id == i - 1 + + endpoint.stop() + + endpoint.start() + + # Now wait for the rest of the migrations + endpoint.wait_for_migrations() + + with endpoint.cursor() as cur: + cur.execute("SELECT id FROM neon_migration.migration_id") + migration_id = cast("int", cur.fetchall()[0][0]) + assert migration_id == NUM_COMPUTE_MIGRATIONS + + for i, m in enumerate(COMPUTE_MIGRATIONS, start=1): + migration_query = (compute_migrations_dir / m).read_text(encoding="utf-8") + if not migration_query.startswith("-- SKIP"): + pattern = rf"Skipping migration id={i}" + else: + pattern = rf"Running migration id={i}" + + endpoint.log_contains(pattern) + + +@pytest.mark.parametrize( + "migration", + (pytest.param((i, m), id=str(i)) for i, m in enumerate(COMPUTE_MIGRATIONS, start=1)), +) +def test_compute_migrations_e2e( + neon_simple_env: NeonEnv, + compute_migrations_dir: Path, + compute_migrations_test_dir: Path, + migration: tuple[int, str], +): + """ + Test that the migrations perform as advertised. + """ + env = neon_simple_env + + migration_id = migration[0] + migration_filename = migration[1] + + migration_query = (compute_migrations_dir / migration_filename).read_text(encoding="utf-8") + if migration_query.startswith("-- SKIP"): + pytest.skip("The migration is marked as SKIP") + + endpoint = env.endpoints.create("main") + endpoint.respec(skip_pg_catalog_updates=False) + + # Stop applying migrations after the one we want to test, so that we can + # test the state of the cluster at the given migration ID + endpoint.start(env={"FAILPOINTS": f"compute-migration=return({migration_id + 1})"}) + + endpoint.wait_for_migrations(wait_for=migration_id) + + check_query = (compute_migrations_test_dir / migration_filename).read_text(encoding="utf-8") + endpoint.safe_psql(check_query) diff --git a/test_runner/regress/test_migrations.py b/test_runner/regress/test_migrations.py deleted file mode 100644 index 7211619a99..0000000000 --- a/test_runner/regress/test_migrations.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - -import time -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from fixtures.neon_fixtures import NeonEnv - - -def test_migrations(neon_simple_env: NeonEnv): - env = neon_simple_env - - endpoint = env.endpoints.create("main") - endpoint.respec(skip_pg_catalog_updates=False) - endpoint.start() - - num_migrations = 11 - endpoint.wait_for_migrations(num_migrations=num_migrations) - - with endpoint.cursor() as cur: - cur.execute("SELECT id FROM neon_migration.migration_id") - migration_id = cur.fetchall() - assert migration_id[0][0] == num_migrations - - endpoint.stop() - endpoint.start() - # We don't have a good way of knowing that the migrations code path finished executing - # in compute_ctl in the case that no migrations are being run - time.sleep(1) - with endpoint.cursor() as cur: - cur.execute("SELECT id FROM neon_migration.migration_id") - migration_id = cur.fetchall() - assert migration_id[0][0] == num_migrations From cd10c719f9c7ec4424a6c30a6419256589c60d2e Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Thu, 2 Jan 2025 11:45:59 -0800 Subject: [PATCH 65/89] compute: Add spec support for disabling LFC resizing (#10132) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ref neondatabase/cloud#21731 ## Problem When we manually override the LFC size for particular computes, autoscaling will typically undo that because vm-monitor will resize LFC itself. So, we'd like a way to make vm-monitor not set LFC size — this actually already exists, if we just don't give vm-monitor a postgres connection string. ## Summary of changes Add a new field to the compute spec, `disable_lfc_resizing`. When set to `true`, we pass in `None` for its postgres connection string. That matches the configuration tested in `neondatabase/autoscaling` CI. --- compute_tools/src/bin/compute_ctl.rs | 19 +++++++++++++++---- control_plane/src/endpoint.rs | 1 + libs/compute_api/src/spec.rs | 9 +++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 95ade9a87d..26ae25ec20 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -424,9 +424,13 @@ fn start_postgres( "running compute with features: {:?}", state.pspec.as_ref().unwrap().spec.features ); - // before we release the mutex, fetch the swap size (if any) for later. - let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes; - let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes; + // before we release the mutex, fetch some parameters for later. + let &ComputeSpec { + swap_size_bytes, + disk_quota_bytes, + disable_lfc_resizing, + .. + } = &state.pspec.as_ref().unwrap().spec; drop(state); // Launch remaining service threads @@ -531,11 +535,18 @@ fn start_postgres( // This token is used internally by the monitor to clean up all threads let token = CancellationToken::new(); + // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC + let pgconnstr = if disable_lfc_resizing.unwrap_or(false) { + None + } else { + file_cache_connstr.cloned() + }; + let vm_monitor = rt.as_ref().map(|rt| { rt.spawn(vm_monitor::start( Box::leak(Box::new(vm_monitor::Args { cgroup: cgroup.cloned(), - pgconnstr: file_cache_connstr.cloned(), + pgconnstr, addr: vm_monitor_addr.clone(), })), token.clone(), diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 5ebf842813..5e47ec4811 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -585,6 +585,7 @@ impl Endpoint { features: self.features.clone(), swap_size_bytes: None, disk_quota_bytes: None, + disable_lfc_resizing: None, cluster: Cluster { cluster_id: None, // project ID: not used name: None, // project name: not used diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index 6d9c353cda..54d6a1d38f 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -67,6 +67,15 @@ pub struct ComputeSpec { #[serde(default)] pub disk_quota_bytes: Option, + /// Disables the vm-monitor behavior that resizes LFC on upscale/downscale, instead relying on + /// the initial size of LFC. + /// + /// This is intended for use when the LFC size is being overridden from the default but + /// autoscaling is still enabled, and we don't want the vm-monitor to interfere with the custom + /// LFC sizing. + #[serde(default)] + pub disable_lfc_resizing: Option, + /// Expected cluster state at the end of transition process. pub cluster: Cluster, pub delta_operations: Option>, From eefad27538757a760532ccf4ae2da0088dafb47a Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Thu, 2 Jan 2025 16:12:56 -0600 Subject: [PATCH 66/89] Inline various migration queries (#10231) There was no value in saving them off to temporary variables. Signed-off-by: Tristan Partin Signed-off-by: Tristan Partin --- compute_tools/src/migration.rs | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs index 07d738abe9..1f3de65806 100644 --- a/compute_tools/src/migration.rs +++ b/compute_tools/src/migration.rs @@ -35,8 +35,6 @@ impl<'m> MigrationRunner<'m> { /// used if you would like to fail the application of a series of migrations /// at some point. fn update_migration_id(&mut self, migration_id: i64) -> Result<()> { - let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id); - // We use this fail point in order to check that failing in the // middle of applying a series of migrations fails in an expected // manner @@ -58,7 +56,10 @@ impl<'m> MigrationRunner<'m> { } self.client - .simple_query(&setval) + .query( + "UPDATE neon_migration.migration_id SET id = $1", + &[&migration_id], + ) .context("run_migrations update id")?; Ok(()) @@ -66,20 +67,16 @@ impl<'m> MigrationRunner<'m> { /// Prepare the migrations the target database for handling migrations fn prepare_database(&mut self) -> Result<()> { - let query = "CREATE SCHEMA IF NOT EXISTS neon_migration"; - self.client.simple_query(query)?; - - let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)"; - self.client.simple_query(query)?; - - let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING"; - self.client.simple_query(query)?; - - let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin"; - self.client.simple_query(query)?; - - let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC"; - self.client.simple_query(query)?; + self.client + .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")?; + self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)")?; + self.client.simple_query( + "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING", + )?; + self.client + .simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")?; + self.client + .simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")?; Ok(()) } From 7a598b9842f47707d9d93dda621094494221c5a6 Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Fri, 3 Jan 2025 12:04:58 +0200 Subject: [PATCH 67/89] [proxy/docs]imprv: Add local testing section to proxy README (#10230) Add commands to run proxy locally with the mocked control plane --- proxy/README.md | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/proxy/README.md b/proxy/README.md index 8d850737be..4b98342d72 100644 --- a/proxy/README.md +++ b/proxy/README.md @@ -102,23 +102,39 @@ User can pass several optional headers that will affect resulting json. 2. `Neon-Array-Mode: true`. Return postgres rows as arrays instead of objects. That is more compact representation and also helps in some edge cases where it is hard to use rows represented as objects (e.g. when several fields have the same name). +## Test proxy locally -## Using SNI-based routing on localhost - -Now proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so I usually use `*.localtest.me` which resolves to `127.0.0.1`. Now we can create self-signed certificate and play with proxy: +Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`. +Let's create self-signed certificate by running: ```sh openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me" ``` -start proxy - +Then we need to build proxy with 'testing' feature and run, e.g.: ```sh -./target/debug/proxy -c server.crt -k server.key +RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://proxy:password@endpoint.localtest.me:5432/postgres' --is-private-access-proxy true -c server.crt -k server.key ``` -and connect to it +We will also need to have a postgres instance. Assuming that we have setted up docker we can set it up as follows: +```sh +docker run \ + --detach \ + --name proxy-postgres \ + --env POSTGRES_PASSWORD=proxy-postgres \ + --publish 5432:5432 \ + postgres:17-bookworm +``` + +Next step is setting up auth table and schema as well as creating role (without the JWT table): +```sh +docker exec -it proxy-postgres psql -U postgres -c "CREATE SCHEMA IF NOT EXISTS neon_control_plane" +docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))" +docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';" +``` + +Now from client you can start a new session: ```sh -PGSSLROOTCERT=./server.crt psql 'postgres://my-cluster-42.localtest.me:1234?sslmode=verify-full' -``` +PGSSLROOTCERT=./server.crt psql "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full" +``` \ No newline at end of file From 2d4f267983858c197de795074046a2b1376a8616 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 3 Jan 2025 10:20:18 +0000 Subject: [PATCH 68/89] cargo: update diesel, pq-sys (#10256) ## Problem Versions of `diesel` and `pq-sys` were somewhat stale. I was checking on libpq->openssl versions while investigating a segfault via https://github.com/neondatabase/cloud/issues/21010. I don't think these rust bindings are likely to be the source of issues, but we might as well freshen them as a precaution. ## Summary of changes - Update diesel to 2.2.6 - Update pq-sys to 0.6.3 --- Cargo.lock | 8 ++++---- storage_controller/Cargo.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 420def152d..9e0e343996 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1733,9 +1733,9 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c" [[package]] name = "diesel" -version = "2.2.3" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71" +checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12" dependencies = [ "bitflags 2.4.1", "byteorder", @@ -4494,9 +4494,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "pq-sys" -version = "0.4.8" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd" +checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793" dependencies = [ "vcpkg", ] diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index 2f5d266567..5f3319512d 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -43,13 +43,13 @@ scopeguard.workspace = true strum.workspace = true strum_macros.workspace = true -diesel = { version = "2.1.4", features = [ +diesel = { version = "2.2.6", features = [ "serde_json", "postgres", "r2d2", "chrono", ] } -diesel_migrations = { version = "2.1.0" } +diesel_migrations = { version = "2.2.0" } r2d2 = { version = "0.8.10" } utils = { path = "../libs/utils/" } From ba9722a2fd913d30b639ec506c42761cdca44440 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 3 Jan 2025 10:55:07 +0000 Subject: [PATCH 69/89] tests: add upload wait in test_scrubber_physical_gc_ancestors (#10260) ## Problem We see periodic failures in `test_scrubber_physical_gc_ancestors`, where the logs show that the pageserver is creating image layers that should cause child shards to no longer reference their parents' layers, but then the scrubber runs and doesn't find any unreferenced layers.[ https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10256/12582034135/index.html#/testresult/78ea06dea6ba8dd3 From inspecting the code & test, it seems like this could be as simple as the test failing to wait for uploads before running the scrubber. It had a 2 second delay built in to satisfy the scrubbers time threshold checks, which on a lightly loaded machine would also have been easily enough for uploads to complete, but our test machines are more heavily loaded all the time. ## Summary of changes - Wait for uploads to complete after generating images layers in test_scrubber_physical_gc_ancestors, so that the scrubber should reliably see the post-compaction metadata. --- test_runner/regress/test_storage_scrubber.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index 198e4f0460..220c428531 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -266,7 +266,9 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_ for shard in shards: ps = env.get_tenant_pageserver(shard) assert ps is not None - ps.http_client().timeline_compact(shard, timeline_id, force_image_layer_creation=True) + ps.http_client().timeline_compact( + shard, timeline_id, force_image_layer_creation=True, wait_until_uploaded=True + ) ps.http_client().timeline_gc(shard, timeline_id, 0) # We will use a min_age_secs=1 threshold for deletion, let it pass From c08759f367063fde2558f979b83f6f9209741c7a Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 3 Jan 2025 10:55:15 +0000 Subject: [PATCH 70/89] storcon: verbose logs in rare case of shards not attached yet (#10262) ## Problem When we do a timeline CRUD operation, we check that the shards we need to mutate are currently attached to a pageserver, by reading `generation` and `generation_pageserver` from the database. If any don't appear to be attached, we respond with a a 503 and "One or more shards in tenant is not yet attached". This is happening more often than expected, and it's not obvious with current logging what's going on: specifically which shard has a problem, and exactly what we're seeing in these persistent generation columns. (Aside: it's possible that we broke something with the change in #10011 which clears generation_pageserver when we detach a shard, although if so the mechanism isn't trivial: what should happen is that if we stamp on generation_pageserver if a reconciler is running, then it shouldn't matter because we're about to ## Summary of changes - When we are in Attached mode but find that generation_pageserver/generation are unset, output details while looping over shards. --- storage_controller/src/service.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index c0c5bc371a..222cb9fdd4 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -3572,6 +3572,11 @@ impl Service { .iter() .any(|i| i.generation.is_none() || i.generation_pageserver.is_none()) { + let shard_generations = generations + .into_iter() + .map(|i| (i.tenant_shard_id, (i.generation, i.generation_pageserver))) + .collect::>(); + // One or more shards has not been attached to a pageserver. Check if this is because it's configured // to be detached (409: caller should give up), or because it's meant to be attached but isn't yet (503: caller should retry) let locked = self.inner.read().unwrap(); @@ -3582,6 +3587,28 @@ impl Service { PlacementPolicy::Attached(_) => { // This shard is meant to be attached: the caller is not wrong to try and // use this function, but we can't service the request right now. + let Some(generation) = shard_generations.get(shard_id) else { + // This can only happen if there is a split brain controller modifying the database. This should + // never happen when testing, and if it happens in production we can only log the issue. + debug_assert!(false); + tracing::error!("Shard {shard_id} not found in generation state! Is another rogue controller running?"); + continue; + }; + let (generation, generation_pageserver) = generation; + if let Some(generation) = generation { + if generation_pageserver.is_none() { + // This is legitimate only in a very narrow window where the shard was only just configured into + // Attached mode after being created in Secondary or Detached mode, and it has had its generation + // set but not yet had a Reconciler run (reconciler is the only thing that sets generation_pageserver). + tracing::warn!("Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?"); + } + } else { + // This should never happen: a shard with no generation is only permitted when it was created in some state + // other than PlacementPolicy::Attached (and generation is always written to DB before setting Attached in memory) + debug_assert!(false); + tracing::error!("Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!"); + continue; + } } PlacementPolicy::Secondary | PlacementPolicy::Detached => { return Err(ApiError::Conflict(format!( From 1303cd5d05062c95660ec00f8846b4258fc62b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 3 Jan 2025 13:36:01 +0100 Subject: [PATCH 71/89] Fix defusing race between Tenant::shutdown and offload_timeline (#10150) There is a race condition between `Tenant::shutdown`'s `defuse_for_drop` loop and `offload_timeline`, where timeline offloading can insert into a tenant that is in the process of shutting down, in fact so far progressed that the `defuse_for_drop` has already been called. This prevents warn log lines of the form: ``` offloaded timeline was dropped without having cleaned it up at the ancestor ``` The solution piggybacks on the `offloaded_timelines` lock: both the defuse loop and the offloaded timeline insertion need to acquire the lock, and we know that the defuse loop only runs after the tenant has set its `TenantState` to `Stopping`. So if we hold the `offloaded_timelines` lock, and know that the `TenantState` is not `Stopping`, then we know that the defuse loop has not ran yet, and holding the lock ensures that it doesn't start running while we are inserting the offloaded timeline. Fixes #10070 --- pageserver/src/tenant/timeline/offload.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs index 3bfbfb5061..15628a9645 100644 --- a/pageserver/src/tenant/timeline/offload.rs +++ b/pageserver/src/tenant/timeline/offload.rs @@ -1,5 +1,7 @@ use std::sync::Arc; +use pageserver_api::models::TenantState; + use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard}; use super::Timeline; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; @@ -70,6 +72,15 @@ pub(crate) async fn offload_timeline( { let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap(); + if matches!( + tenant.current_state(), + TenantState::Stopping { .. } | TenantState::Broken { .. } + ) { + // Cancel the operation if the tenant is shutting down. Do this while the + // timelines_offloaded lock is held to prevent a race with Tenant::shutdown + // for defusing the lock + return Err(OffloadError::Cancelled); + } offloaded_timelines.insert( timeline.timeline_id, Arc::new( From e9d30edc7f7f5fab66f7bdffcca9e8c5bb9b8d27 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 3 Jan 2025 13:13:22 +0000 Subject: [PATCH 72/89] pageserver: fix a 500 during timeline creation + shutdown (#10259) ## Problem The test_create_churn_during_restart test fails if timeline creation calls return 500 errors (because the API shouldn't do it), and it's sometimes failing, for example: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-10256/12582034135/index.html#/testresult/3ce2e7045465012e ## Summary of changes - Avoid handling UploadQueueShutDownOrStopped case as an Other (i.e. 500) --- pageserver/src/tenant.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 2e4c47c6e4..90017b25f2 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2604,9 +2604,15 @@ impl Tenant { WaitCompletionError::NotInitialized( e, // If the queue is already stopped, it's a shutdown error. ) if e.is_stopping() => CreateTimelineError::ShuttingDown, - e => CreateTimelineError::Other(e.into()), - }) - .context("wait for timeline initial uploads to complete")?; + WaitCompletionError::NotInitialized(_) => { + // This is a bug: we should never try to wait for uploads before initializing the timeline + debug_assert!(false); + CreateTimelineError::Other(anyhow::anyhow!("timeline not initialized")) + } + WaitCompletionError::UploadQueueShutDownOrStopped => { + CreateTimelineError::ShuttingDown + } + })?; // The creating task is responsible for activating the timeline. // We do this after `wait_completion()` so that we don't spin up tasks that start From b33299dc37d9269fe55bd3256b7a4a72c129b81c Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 3 Jan 2025 16:21:31 +0100 Subject: [PATCH 73/89] pageserver,safekeeper: disable heap profiling (#10268) ## Problem Since enabling continuous profiling in staging, we've seen frequent seg faults. This is suspected to be because jemalloc and pprof-rs take a stack trace at the same time, and the handlers aren't signal safe. jemalloc does this probabilistically on every allocation, regardless of whether someone is taking a heap profile, which means that any CPU profile has a chance to cause a seg fault. Touches #10225. ## Summary of changes For now, just disable heap profiles -- CPU profiles are more important, and we need to be able to take them without risking a crash. --- pageserver/src/bin/pageserver.rs | 10 ++++++---- safekeeper/src/bin/safekeeper.rs | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 567a69da3b..b92ff4ebf9 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -53,10 +53,12 @@ project_build_tag!(BUILD_TAG); #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). -#[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] -pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; +// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +// TODO: disabled because concurrent CPU profiles cause seg faults. See: +// https://github.com/neondatabase/neon/issues/10225. +//#[allow(non_upper_case_globals)] +//#[export_name = "malloc_conf"] +//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; const PID_FILE_NAME: &str = "pageserver.pid"; diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 13f6e34575..e0ba38d638 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -51,10 +51,12 @@ use utils::{ #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). -#[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] -pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; +// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20). +// TODO: disabled because concurrent CPU profiles cause seg faults. See: +// https://github.com/neondatabase/neon/issues/10225. +//#[allow(non_upper_case_globals)] +//#[export_name = "malloc_conf"] +//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; const PID_FILE_NAME: &str = "safekeeper.pid"; const ID_FILE_NAME: &str = "safekeeper.id"; From 1393cc668bce904cf5300f8829addce86437e755 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 3 Jan 2025 16:38:51 +0100 Subject: [PATCH 74/89] Revert "pageserver: revert flush backpressure (#8550) (#10135)" (#10270) This reverts commit f3ecd5d76ad8b858b2bfaaabba5018046aca46ac. It is [suspected](https://neondb.slack.com/archives/C033RQ5SPDH/p1735907405716759) to have caused significant read amplification in the [ingest benchmark](https://neonprod.grafana.net/d/de3mupf4g68e8e/perf-test3a-ingest-benchmark?orgId=1&from=now-30d&to=now&timezone=utc&var-new_project_endpoint_id=ep-solitary-sun-w22bmut6&var-large_tenant_endpoint_id=ep-holy-bread-w203krzs) (specifically during index creation). We will revisit an intermediate improvement here to unblock [upload parallelism](https://github.com/neondatabase/neon/issues/10096) before properly addressing [compaction backpressure](https://github.com/neondatabase/neon/issues/8390). --- pageserver/src/metrics.rs | 25 ++++++++++- pageserver/src/tenant/timeline.rs | 38 +++++++++++++---- test_runner/fixtures/metrics.py | 1 + test_runner/regress/test_branching.py | 13 ++++-- test_runner/regress/test_remote_storage.py | 48 ++++++++++++++++++++++ 5 files changed, 112 insertions(+), 13 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index bdbabf3f75..b4e20cb8b9 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -3,7 +3,7 @@ use metrics::{ register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec, - Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, + Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; @@ -445,6 +445,15 @@ pub(crate) static WAIT_LSN_TIME: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static FLUSH_WAIT_UPLOAD_TIME: Lazy = Lazy::new(|| { + register_gauge_vec!( + "pageserver_flush_wait_upload_seconds", + "Time spent waiting for preceding uploads during layer flush", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + static LAST_RECORD_LSN: Lazy = Lazy::new(|| { register_int_gauge_vec!( "pageserver_last_record_lsn", @@ -2577,6 +2586,7 @@ pub(crate) struct TimelineMetrics { shard_id: String, timeline_id: String, pub flush_time_histo: StorageTimeMetrics, + pub flush_wait_upload_time_gauge: Gauge, pub compact_time_histo: StorageTimeMetrics, pub create_images_time_histo: StorageTimeMetrics, pub logical_size_histo: StorageTimeMetrics, @@ -2622,6 +2632,9 @@ impl TimelineMetrics { &shard_id, &timeline_id, ); + let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); let compact_time_histo = StorageTimeMetrics::new( StorageTimeOperation::Compact, &tenant_id, @@ -2767,6 +2780,7 @@ impl TimelineMetrics { shard_id, timeline_id, flush_time_histo, + flush_wait_upload_time_gauge, compact_time_histo, create_images_time_histo, logical_size_histo, @@ -2816,6 +2830,14 @@ impl TimelineMetrics { self.resident_physical_size_gauge.get() } + pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) { + self.flush_wait_upload_time_gauge.add(duration); + crate::metrics::FLUSH_WAIT_UPLOAD_TIME + .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id]) + .unwrap() + .add(duration); + } + pub(crate) fn shutdown(&self) { let was_shutdown = self .shutdown @@ -2833,6 +2855,7 @@ impl TimelineMetrics { let shard_id = &self.shard_id; let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); { RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index e71cb4db80..b36c2f487f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -144,15 +144,19 @@ use self::layer_manager::LayerManager; use self::logical_size::LogicalSize; use self::walreceiver::{WalReceiver, WalReceiverConf}; -use super::config::TenantConf; -use super::remote_timeline_client::index::IndexPart; -use super::remote_timeline_client::RemoteTimelineClient; -use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline}; -use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer}; -use super::upload_queue::NotInitialized; -use super::GcError; use super::{ - debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, MaybeOffloaded, + config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized, + MaybeOffloaded, +}; +use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf}; +use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe}; +use super::{ + remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError, + storage_layer::ReadableLayer, +}; +use super::{ + secondary::heatmap::{HeatMapLayer, HeatMapTimeline}, + GcError, }; #[cfg(test)] @@ -3836,6 +3840,24 @@ impl Timeline { // release lock on 'layers' }; + // Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files. + // This makes us refuse ingest until the new layers have been persisted to the remote + let start = Instant::now(); + self.remote_client + .wait_completion() + .await + .map_err(|e| match e { + WaitCompletionError::UploadQueueShutDownOrStopped + | WaitCompletionError::NotInitialized( + NotInitialized::ShuttingDown | NotInitialized::Stopped, + ) => FlushLayerError::Cancelled, + WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => { + FlushLayerError::Other(anyhow!(e).into()) + } + })?; + let duration = start.elapsed().as_secs_f64(); + self.metrics.flush_wait_upload_time_gauge_add(duration); + // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`, // a compaction can delete the file and then it won't be available for uploads any more. // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index eb3d06b949..c5295360c3 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -170,6 +170,7 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = ( "pageserver_evictions_with_low_residence_duration_total", "pageserver_aux_file_estimated_size", "pageserver_valid_lsn_lease_count", + "pageserver_flush_wait_upload_seconds", counter("pageserver_tenant_throttling_count_accounted_start"), counter("pageserver_tenant_throttling_count_accounted_finish"), counter("pageserver_tenant_throttling_wait_usecs_sum"), diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index a4056404f0..34e4e994cb 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -19,7 +19,6 @@ from fixtures.pageserver.utils import wait_until_tenant_active from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix from requests import RequestException -from requests.exceptions import RetryError # Test branch creation @@ -177,8 +176,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env.neon_cli.mappings_map_branch(initial_branch, env.initial_tenant, env.initial_timeline) - with pytest.raises(RuntimeError, match="is not active, state: Loading"): - env.endpoints.create_start(initial_branch, tenant_id=env.initial_tenant) + with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"): + env.endpoints.create_start( + initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2 + ) + ps_http.configure_failpoints(("before-upload-index-pausable", "off")) finally: env.pageserver.stop(immediate=True) @@ -219,7 +221,10 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder branch_id = TimelineId.generate() - with pytest.raises(RetryError, match="too many 503 error responses"): + with pytest.raises( + PageserverApiException, + match="Cannot branch off the timeline that's not present in pageserver", + ): ps_http.timeline_create( env.pg_version, env.initial_tenant, diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 52b6b254aa..76a42ef4a2 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -784,6 +784,54 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv create_thread.join() +def test_paused_upload_stalls_checkpoint( + neon_env_builder: NeonEnvBuilder, +): + """ + This test checks that checkpoints block on uploads to remote storage. + """ + neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) + + env = neon_env_builder.init_start( + initial_tenant_conf={ + # Set a small compaction threshold + "compaction_threshold": "3", + # Disable GC + "gc_period": "0s", + # disable PITR + "pitr_interval": "0s", + } + ) + + env.pageserver.allowed_errors.append( + f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing" + ) + + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + client = env.pageserver.http_client() + layers_at_creation = client.layer_map_info(tenant_id, timeline_id) + deltas_at_creation = len(layers_at_creation.delta_layers()) + assert ( + deltas_at_creation == 1 + ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle" + + # Make new layer uploads get stuck. + # Note that timeline creation waits for the initial layers to reach remote storage. + # So at this point, the `layers_at_creation` are in remote storage. + client.configure_failpoints(("before-upload-layer-pausable", "pause")) + + with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + # Build two tables with some data inside + endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)") + wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) + + with pytest.raises(ReadTimeout): + client.timeline_checkpoint(tenant_id, timeline_id, timeout=5) + client.configure_failpoints(("before-upload-layer-pausable", "off")) + + def wait_upload_queue_empty( client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId ): From a77e87a48a628abcac77afeb1f64e5a491275f1c Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 3 Jan 2025 17:03:19 +0100 Subject: [PATCH 75/89] pageserver: assert that uploads don't modify indexed layers (#10228) ## Problem It's not legal to modify layers that are referenced by the current layer index. Assert this in the upload queue, as preparation for upload queue reordering. Touches #10096. ## Summary of changes Add a debug assertion that the upload queue does not modify layers referenced by the current index. I could be convinced that this should be a plain assertion, but will be conservative for now. --- .../src/tenant/remote_timeline_client.rs | 39 +++++++++++++++++++ .../tenant/remote_timeline_client/index.rs | 21 +++++++--- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index fee11bc742..b27ac3e933 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1943,6 +1943,30 @@ impl RemoteTimelineClient { return; } + // Assert that we don't modify a layer that's referenced by the current index. + if cfg!(debug_assertions) { + let modified = match &task.op { + UploadOp::UploadLayer(layer, layer_metadata, _) => { + vec![(layer.layer_desc().layer_name(), layer_metadata)] + } + UploadOp::Delete(delete) => { + delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect() + } + // These don't modify layers. + UploadOp::UploadMetadata { .. } => Vec::new(), + UploadOp::Barrier(_) => Vec::new(), + UploadOp::Shutdown => Vec::new(), + }; + if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() { + for (ref name, metadata) in modified { + debug_assert!( + !queue.clean.0.references(name, metadata), + "layer {name} modified while referenced by index", + ); + } + } + } + let upload_result: anyhow::Result<()> = match &task.op { UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => { if let Some(OpType::FlushDeletion) = mode { @@ -2509,6 +2533,21 @@ pub fn remote_layer_path( RemotePath::from_string(&path).expect("Failed to construct path") } +/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially +/// remote_layer_path(a) == remote_layer_path(b) without the string allocations. +/// +/// TODO: there should be a variant of LayerName for the physical path that contains information +/// about the shard and generation, such that this could be replaced by a simple comparison. +pub fn is_same_remote_layer_path( + aname: &LayerName, + ameta: &LayerFileMetadata, + bname: &LayerName, + bmeta: &LayerFileMetadata, +) -> bool { + // NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug. + aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation +} + pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath { RemotePath::from_string(&format!( "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}" diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index 506990fb2f..51f093cb87 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -8,14 +8,14 @@ use std::collections::HashMap; use chrono::NaiveDateTime; use pageserver_api::models::AuxFilePolicy; use serde::{Deserialize, Serialize}; -use utils::id::TimelineId; +use super::is_same_remote_layer_path; use crate::tenant::metadata::TimelineMetadata; use crate::tenant::storage_layer::LayerName; use crate::tenant::timeline::import_pgdata; use crate::tenant::Generation; use pageserver_api::shard::ShardIndex; - +use utils::id::TimelineId; use utils::lsn::Lsn; /// In-memory representation of an `index_part.json` file @@ -45,10 +45,8 @@ pub struct IndexPart { #[serde(skip_serializing_if = "Option::is_none")] pub import_pgdata: Option, - /// Per layer file name metadata, which can be present for a present or missing layer file. - /// - /// Older versions of `IndexPart` will not have this property or have only a part of metadata - /// that latest version stores. + /// Layer filenames and metadata. For an index persisted in remote storage, all layers must + /// exist in remote storage. pub layer_metadata: HashMap, /// Because of the trouble of eyeballing the legacy "metadata" field, we copied the @@ -143,6 +141,17 @@ impl IndexPart { pub(crate) fn example() -> Self { Self::empty(TimelineMetadata::example()) } + + /// Returns true if the index contains a reference to the given layer (i.e. file path). + /// + /// TODO: there should be a variant of LayerName for the physical remote path that contains + /// information about the shard and generation, to avoid passing in metadata. + pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool { + let Some(index_metadata) = self.layer_metadata.get(name) else { + return false; + }; + is_same_remote_layer_path(name, metadata, name, index_metadata) + } } /// Metadata gathered for each of the layer files. From 4b2f56862dc5738407893df451348e78696abd65 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 3 Jan 2025 16:16:04 +0000 Subject: [PATCH 76/89] docker: include vanilla debian postgres client (#10269) ## Problem We are chasing down segfaults in the storage controller https://github.com/neondatabase/cloud/issues/21010 This is for use by the storage controller, which links dynamically with `libpq`. We currently use the neon-built libpq, but this may be unsafe for use from multi-threaded programs like the controller, as it uses a statically linked openssl Precursor to https://github.com/neondatabase/neon/pull/10258 ## Summary of changes - Include `postgresql-15` in container builds. The reason for using version 15 is simply because that is what's available in Debian 12 without adding any extra repositories, and we don't have any special need for latest version in our libpq usage. --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index e888efbae2..df9bcb3002 100644 --- a/Dockerfile +++ b/Dockerfile @@ -69,6 +69,8 @@ RUN set -e \ libreadline-dev \ libseccomp-dev \ ca-certificates \ + # System postgres for use with client libraries (e.g. in storage controller) + postgresql-15 \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ && useradd -d /data neon \ && chown -R neon:neon /data From b368e62cfc374bd48ca656b476c5c081c4018546 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 4 Jan 2025 15:40:50 +0000 Subject: [PATCH 77/89] build(deps): bump jinja2 from 3.1.4 to 3.1.5 in the pip group (#10236) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 20 +++++++++++++++----- pyproject.toml | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 59ae5cf1ca..072bf9a5e9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1322,13 +1322,13 @@ files = [ [[package]] name = "jinja2" -version = "3.1.4" +version = "3.1.5" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, + {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"}, + {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"}, ] [package.dependencies] @@ -3309,6 +3309,16 @@ files = [ {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, + {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"}, + {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"}, + {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"}, + {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"}, + {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"}, + {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, @@ -3524,4 +3534,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "426c385df93f578ba3537c40a269535e27fbcca1978b3cf266096ecbc298c6a9" +content-hash = "9032c11f264f2f6d8a50230e5021c606d460aafdf370da0524784c3f0f1f31b1" diff --git a/pyproject.toml b/pyproject.toml index 01d15ee6bb..ba4ab0b1f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ requests = "^2.32.3" pytest-xdist = "^3.3.1" asyncpg = "^0.29.0" aiopg = "^1.4.0" -Jinja2 = "^3.1.4" +Jinja2 = "^3.1.5" types-requests = "^2.31.0.0" types-psycopg2 = "^2.9.21.20241019" boto3 = "^1.34.11" From 406cca643b9529979522b33abf3a0457681fc987 Mon Sep 17 00:00:00 2001 From: Busra Kugler Date: Mon, 6 Jan 2025 11:44:23 +0100 Subject: [PATCH 78/89] Update neon_fixtures.py - remove logs (#10219) We need to remove this line to prevent aws keys exposing in the public s3 buckets --- test_runner/fixtures/neon_fixtures.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index a0c642163d..8fd9eec8ce 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -4606,7 +4606,8 @@ class StorageScrubber: ] args = base_args + args - log.info(f"Invoking scrubber command {args} with env: {env}") + log.info(f"Invoking scrubber command {args}") + (output_path, stdout, status_code) = subprocess_capture( self.log_dir, args, From fda52a0005ea4ca8e4e1d6a16de75724a7e619fe Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Mon, 6 Jan 2025 13:05:35 +0000 Subject: [PATCH 79/89] feat(proxy): dont trigger error alerts for unknown topics (#10266) ## Problem Before the holidays, and just before our code freeze, a change to cplane was made that started publishing the topics from #10197. This triggered our alerts and put us in a sticky situation as it was not an error, and we didn't want to silence the alert for the entire holidays, and we didn't want to release proxy 2 days in a row if it was not essential. We fixed it eventually by rewriting the alert based on logs, but this is not a good solution. ## Summary of changes Introduces an intermediate parsing step to check the topic name first, to allow us to ignore parsing errors for any topics we do not know about. --- proxy/src/redis/notifications.rs | 48 +++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs index 671305a300..4383d6be2c 100644 --- a/proxy/src/redis/notifications.rs +++ b/proxy/src/redis/notifications.rs @@ -30,8 +30,14 @@ async fn try_connect(client: &ConnectionWithCredentialsProvider) -> anyhow::Resu Ok(conn) } +#[derive(Debug, Deserialize)] +struct NotificationHeader<'a> { + topic: &'a str, +} + #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] #[serde(tag = "topic", content = "data")] +// Message to contributors: Make sure to align these topic names with the list below. pub(crate) enum Notification { #[serde( rename = "/allowed_ips_updated", @@ -69,6 +75,22 @@ pub(crate) enum Notification { #[serde(rename = "/cancel_session")] Cancel(CancelSession), } + +/// Returns true if the topic name given is a known topic that we can deserialize and action on. +/// Returns false otherwise. +fn known_topic(s: &str) -> bool { + // Message to contributors: Make sure to align these topic names with the enum above. + matches!( + s, + "/allowed_ips_updated" + | "/block_public_or_vpc_access_updated" + | "/allowed_vpc_endpoints_updated_for_org" + | "/allowed_vpc_endpoints_updated_for_projects" + | "/password_updated" + | "/cancel_session" + ) +} + #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub(crate) struct AllowedIpsUpdate { project_id: ProjectIdInt, @@ -96,6 +118,7 @@ pub(crate) struct PasswordUpdate { project_id: ProjectIdInt, role_name: RoleNameInt, } + #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub(crate) struct CancelSession { pub(crate) region_id: Option, @@ -141,18 +164,23 @@ impl MessageHandler { region_id, } } + pub(crate) async fn increment_active_listeners(&self) { self.cache.increment_active_listeners().await; } + pub(crate) async fn decrement_active_listeners(&self) { self.cache.decrement_active_listeners().await; } + #[tracing::instrument(skip(self, msg), fields(session_id = tracing::field::Empty))] async fn handle_message(&self, msg: redis::Msg) -> anyhow::Result<()> { let payload: String = msg.get_payload()?; tracing::debug!(?payload, "received a message payload"); - let msg: Notification = match serde_json::from_str(&payload) { + // For better error handling, we first parse the payload to extract the topic. + // If there's a topic we don't support, we can handle that error more gracefully. + let header: NotificationHeader = match serde_json::from_str(&payload) { Ok(msg) => msg, Err(e) => { Metrics::get().proxy.redis_errors_total.inc(RedisErrors { @@ -162,6 +190,24 @@ impl MessageHandler { return Ok(()); } }; + + if !known_topic(header.topic) { + // don't update the metric for redis errors if it's just a topic we don't know about. + tracing::warn!(topic = header.topic, "unknown topic"); + return Ok(()); + } + + let msg: Notification = match serde_json::from_str(&payload) { + Ok(msg) => msg, + Err(e) => { + Metrics::get().proxy.redis_errors_total.inc(RedisErrors { + channel: msg.get_channel_name(), + }); + tracing::error!(topic = header.topic, "broken message: {e}"); + return Ok(()); + } + }; + tracing::debug!(?msg, "received a message"); match msg { Notification::Cancel(cancel_session) => { From 95f1920231465e2b898b71a9959acec9ddd63896 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 6 Jan 2025 18:27:08 +0100 Subject: [PATCH 80/89] cargo: build with frame pointers (#10226) ## Problem Frame pointers are typically disabled by default (depending on CPU architecture), to improve performance. This frees up a CPU register, and avoids a couple of instructions per function call. However, it makes stack unwinding much more inefficient, since it has to use DWARF debug information instead, and gives worse results with e.g. `perf` and eBPF profiles. The `backtrace` implementation of `libunwind` is also suspected to cause seg faults. The performance benefit of frame pointer omission doesn't appear to matter that much on modern 64-bit CPU architectures (which have plenty of registers and optimized instruction execution), and benchmarks did not show measurable overhead. The Rust standard library and jemalloc already enable frame pointers by default. For more information, see https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html. Resolves #10224. Touches #10225. ## Summary of changes Enable frame pointers in all builds, and use frame pointers for pprof-rs stack sampling. --- .cargo/config.toml | 8 ++++++++ Cargo.toml | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 5e452974ad..20a2a929b9 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -3,6 +3,14 @@ # by the RUSTDOCFLAGS env var in CI. rustdocflags = ["-Arustdoc::private_intra_doc_links"] +# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more +# efficient to obtain stack traces (and thus CPU/heap profiles). With continuous profiling, this is +# likely a net win, and allows higher profiling resolution. See also: +# +# * +# * +rustflags = ["-Cforce-frame-pointers=yes"] + [alias] build_testing = ["build", "--features", "testing"] neon = ["run", "--bin", "neon_local"] diff --git a/Cargo.toml b/Cargo.toml index 885f02ba81..197808d5ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] } parquet_derive = "53" pbkdf2 = { version = "0.12.1", features = ["simple", "std"] } pin-project-lite = "0.2" -pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] } +pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] } procfs = "0.16" prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency prost = "0.13" @@ -266,6 +266,8 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br [profile.release] # This is useful for profiling and, to some extent, debug. # Besides, debug info should not affect the performance. +# +# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml. debug = true # disable debug symbols for all packages except this one to decrease binaries size From 4a6556e269018844a8c3413bd7414331cd968fce Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:29:18 -0500 Subject: [PATCH 81/89] fix(pageserver): ensure GC computes time cutoff using the same start time (#10193) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem close https://github.com/neondatabase/neon/issues/10192 ## Summary of changes * `find_gc_time_cutoff` takes `now` parameter so that all branches compute the cutoff based on the same start time, avoiding races. * gc-compaction uses a single `get_gc_compaction_watermark` function to get the safe LSN to compact. --------- Signed-off-by: Alex Chi Z Co-authored-by: Arpad Müller --- pageserver/src/tenant.rs | 6 +++++- pageserver/src/tenant/timeline.rs | 5 +++-- pageserver/src/tenant/timeline/compaction.rs | 22 ++++++++++++++++++-- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 90017b25f2..e3dab2fc1d 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -4488,13 +4488,17 @@ impl Tenant { let mut gc_cutoffs: HashMap = HashMap::with_capacity(timelines.len()); + // Ensures all timelines use the same start time when computing the time cutoff. + let now_ts_for_pitr_calc = SystemTime::now(); for timeline in timelines.iter() { let cutoff = timeline .get_last_record_lsn() .checked_sub(horizon) .unwrap_or(Lsn(0)); - let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?; + let cutoffs = timeline + .find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx) + .await?; let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs); assert!(old.is_none()); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b36c2f487f..c1b71262e0 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4859,6 +4859,7 @@ impl Timeline { async fn find_gc_time_cutoff( &self, + now: SystemTime, pitr: Duration, cancel: &CancellationToken, ctx: &RequestContext, @@ -4866,7 +4867,6 @@ impl Timeline { debug_assert_current_span_has_tenant_and_timeline_id(); if self.shard_identity.is_shard_zero() { // Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself - let now = SystemTime::now(); let time_range = if pitr == Duration::ZERO { humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid") } else { @@ -4952,6 +4952,7 @@ impl Timeline { #[instrument(skip_all, fields(timeline_id=%self.timeline_id))] pub(super) async fn find_gc_cutoffs( &self, + now: SystemTime, space_cutoff: Lsn, pitr: Duration, cancel: &CancellationToken, @@ -4979,7 +4980,7 @@ impl Timeline { // - if PITR interval is set, then this is our cutoff. // - if PITR interval is not set, then we do a lookup // based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases. - let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?; + let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?; Ok(match (pitr, time_cutoff) { (Duration::ZERO, Some(time_cutoff)) => { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 94c65631b2..55cde8603e 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1799,6 +1799,24 @@ impl Timeline { Ok(()) } + /// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for + /// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon). + /// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them + /// here. + pub(crate) fn get_gc_compaction_watermark(self: &Arc) -> Lsn { + let gc_cutoff_lsn = { + let gc_info = self.gc_info.read().unwrap(); + gc_info.min_cutoff() + }; + + // TODO: standby horizon should use leases so we don't really need to consider it here. + // let watermark = watermark.min(self.standby_horizon.load()); + + // TODO: ensure the child branches will not use anything below the watermark, or consider + // them when computing the watermark. + gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn()) + } + /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job. /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts @@ -1811,7 +1829,7 @@ impl Timeline { let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX { job.compact_lsn_range.end } else { - *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff + self.get_gc_compaction_watermark() }; // Split compaction job to about 4GB each @@ -2006,7 +2024,7 @@ impl Timeline { // Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of // cleaning everything that theoritically it could. In the future, it should use `self.gc_info` // to get the truth data. - let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn(); + let real_gc_cutoff = self.get_gc_compaction_watermark(); // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use // the real cutoff. From b342a02b1c591642e2d52be606ccc42857af112d Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 6 Jan 2025 21:17:43 +0100 Subject: [PATCH 82/89] Dockerfile: build with `force-frame-pointers=yes` (#10286) See https://github.com/neondatabase/neon/pull/10226#issuecomment-2573725182. --- .cargo/config.toml | 6 ++++-- Dockerfile | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 20a2a929b9..c71d491303 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -4,11 +4,13 @@ rustdocflags = ["-Arustdoc::private_intra_doc_links"] # Enable frame pointers. This may have a minor performance overhead, but makes it easier and more -# efficient to obtain stack traces (and thus CPU/heap profiles). With continuous profiling, this is -# likely a net win, and allows higher profiling resolution. See also: +# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that +# we've seen with libunwind-based profiling. See also: # # * # * +# +# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well. rustflags = ["-Cforce-frame-pointers=yes"] [alias] diff --git a/Dockerfile b/Dockerfile index df9bcb3002..2c157b3b2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,7 +45,7 @@ COPY --chown=nonroot . . ARG ADDITIONAL_RUSTFLAGS RUN set -e \ - && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \ + && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \ --bin pg_sni_router \ --bin pageserver \ --bin pagectl \ From ad7f14d526a4e7e5195ca5d8651672aae0c96d93 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Mon, 6 Jan 2025 20:25:31 +0000 Subject: [PATCH 83/89] test_runner: update packages for Python 3.13 (#10285) ## Problem It's impossible to run regression tests with Python 3.13 as some dependencies don't support it (some of them are outdated, and `jsonnet` doesn't support it at all yet) ## Summary of changes - Update dependencies for Python 3.13 - Install `jsonnet` only on Python < 3.13 and skip relevant tests on Python 3.13 Closes #10237 --- poetry.lock | 771 +++++++++++--------- pyproject.toml | 11 +- test_runner/regress/test_compute_metrics.py | 9 +- 3 files changed, 459 insertions(+), 332 deletions(-) diff --git a/poetry.lock b/poetry.lock index 072bf9a5e9..5f15223dca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -239,60 +239,66 @@ files = [ [[package]] name = "asyncpg" -version = "0.29.0" +version = "0.30.0" description = "An asyncio PostgreSQL driver" optional = false python-versions = ">=3.8.0" files = [ - {file = "asyncpg-0.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169"}, - {file = "asyncpg-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385"}, - {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22"}, - {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610"}, - {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397"}, - {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb"}, - {file = "asyncpg-0.29.0-cp310-cp310-win32.whl", hash = "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449"}, - {file = "asyncpg-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772"}, - {file = "asyncpg-0.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4"}, - {file = "asyncpg-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac"}, - {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870"}, - {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f"}, - {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23"}, - {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b"}, - {file = "asyncpg-0.29.0-cp311-cp311-win32.whl", hash = "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675"}, - {file = "asyncpg-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3"}, - {file = "asyncpg-0.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178"}, - {file = "asyncpg-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb"}, - {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364"}, - {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106"}, - {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59"}, - {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175"}, - {file = "asyncpg-0.29.0-cp312-cp312-win32.whl", hash = "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02"}, - {file = "asyncpg-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe"}, - {file = "asyncpg-0.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9"}, - {file = "asyncpg-0.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc"}, - {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548"}, - {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7"}, - {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775"}, - {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9"}, - {file = "asyncpg-0.29.0-cp38-cp38-win32.whl", hash = "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408"}, - {file = "asyncpg-0.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3"}, - {file = "asyncpg-0.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da"}, - {file = "asyncpg-0.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3"}, - {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090"}, - {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83"}, - {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810"}, - {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c"}, - {file = "asyncpg-0.29.0-cp39-cp39-win32.whl", hash = "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2"}, - {file = "asyncpg-0.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8"}, - {file = "asyncpg-0.29.0.tar.gz", hash = "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e"}, + {file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"}, + {file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"}, + {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3152fef2e265c9c24eec4ee3d22b4f4d2703d30614b0b6753e9ed4115c8a146f"}, + {file = "asyncpg-0.30.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7255812ac85099a0e1ffb81b10dc477b9973345793776b128a23e60148dd1af"}, + {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:578445f09f45d1ad7abddbff2a3c7f7c291738fdae0abffbeb737d3fc3ab8b75"}, + {file = "asyncpg-0.30.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c42f6bb65a277ce4d93f3fba46b91a265631c8df7250592dd4f11f8b0152150f"}, + {file = "asyncpg-0.30.0-cp310-cp310-win32.whl", hash = "sha256:aa403147d3e07a267ada2ae34dfc9324e67ccc4cdca35261c8c22792ba2b10cf"}, + {file = "asyncpg-0.30.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb622c94db4e13137c4c7f98834185049cc50ee01d8f657ef898b6407c7b9c50"}, + {file = "asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a"}, + {file = "asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed"}, + {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a"}, + {file = "asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956"}, + {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056"}, + {file = "asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454"}, + {file = "asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d"}, + {file = "asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f"}, + {file = "asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e"}, + {file = "asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a"}, + {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3"}, + {file = "asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737"}, + {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a"}, + {file = "asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af"}, + {file = "asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e"}, + {file = "asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305"}, + {file = "asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70"}, + {file = "asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3"}, + {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33"}, + {file = "asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4"}, + {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4"}, + {file = "asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba"}, + {file = "asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590"}, + {file = "asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e"}, + {file = "asyncpg-0.30.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:29ff1fc8b5bf724273782ff8b4f57b0f8220a1b2324184846b39d1ab4122031d"}, + {file = "asyncpg-0.30.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64e899bce0600871b55368b8483e5e3e7f1860c9482e7f12e0a771e747988168"}, + {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b290f4726a887f75dcd1b3006f484252db37602313f806e9ffc4e5996cfe5cb"}, + {file = "asyncpg-0.30.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f86b0e2cd3f1249d6fe6fd6cfe0cd4538ba994e2d8249c0491925629b9104d0f"}, + {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:393af4e3214c8fa4c7b86da6364384c0d1b3298d45803375572f415b6f673f38"}, + {file = "asyncpg-0.30.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fd4406d09208d5b4a14db9a9dbb311b6d7aeeab57bded7ed2f8ea41aeef39b34"}, + {file = "asyncpg-0.30.0-cp38-cp38-win32.whl", hash = "sha256:0b448f0150e1c3b96cb0438a0d0aa4871f1472e58de14a3ec320dbb2798fb0d4"}, + {file = "asyncpg-0.30.0-cp38-cp38-win_amd64.whl", hash = "sha256:f23b836dd90bea21104f69547923a02b167d999ce053f3d502081acea2fba15b"}, + {file = "asyncpg-0.30.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f4e83f067b35ab5e6371f8a4c93296e0439857b4569850b178a01385e82e9ad"}, + {file = "asyncpg-0.30.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5df69d55add4efcd25ea2a3b02025b669a285b767bfbf06e356d68dbce4234ff"}, + {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3479a0d9a852c7c84e822c073622baca862d1217b10a02dd57ee4a7a081f708"}, + {file = "asyncpg-0.30.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26683d3b9a62836fad771a18ecf4659a30f348a561279d6227dab96182f46144"}, + {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1b982daf2441a0ed314bd10817f1606f1c28b1136abd9e4f11335358c2c631cb"}, + {file = "asyncpg-0.30.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1c06a3a50d014b303e5f6fc1e5f95eb28d2cee89cf58384b700da621e5d5e547"}, + {file = "asyncpg-0.30.0-cp39-cp39-win32.whl", hash = "sha256:1b11a555a198b08f5c4baa8f8231c74a366d190755aa4f99aacec5970afe929a"}, + {file = "asyncpg-0.30.0-cp39-cp39-win_amd64.whl", hash = "sha256:8b684a3c858a83cd876f05958823b68e8d14ec01bb0c0d14a6704c5bf9711773"}, + {file = "asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851"}, ] -[package.dependencies] -async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.12.0\""} - [package.extras] -docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"] +docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"] +gssauth = ["gssapi", "sspilib"] +test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi", "k5test", "mypy (>=1.8.0,<1.9.0)", "sspilib", "uvloop (>=0.15.3)"] [[package]] name = "attrs" @@ -766,75 +772,78 @@ files = [ [[package]] name = "cffi" -version = "1.15.1" +version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, - {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, - {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, - {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, - {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, - {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, - {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, - {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, - {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, - {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, - {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, - {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, - {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, - {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, - {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, - {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, - {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, - {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, - {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, - {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, - {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, - {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, - {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, - {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, - {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, - {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, - {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, - {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, - {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, - {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, - {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, - {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, - {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, + {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, + {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, + {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, + {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, + {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, + {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, + {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, + {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, + {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, + {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, + {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, + {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, + {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] [package.dependencies] @@ -1114,72 +1123,103 @@ Flask = ">=0.9" [[package]] name = "frozenlist" -version = "1.4.0" +version = "1.5.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" files = [ - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, - {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, - {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, - {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, - {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, - {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, - {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, - {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, - {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, - {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, - {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, - {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, - {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, - {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, - {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, - {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, - {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, - {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, - {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, - {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, - {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, - {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"}, + {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"}, + {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"}, + {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"}, + {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"}, + {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"}, + {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"}, + {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"}, + {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"}, + {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"}, + {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"}, + {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"}, + {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"}, + {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"}, + {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"}, ] [[package]] @@ -2295,109 +2335,131 @@ files = [ [[package]] name = "pydantic" -version = "2.7.1" +version = "2.10.4" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"}, - {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"}, + {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"}, + {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"}, ] [package.dependencies] -annotated-types = ">=0.4.0" -pydantic-core = "2.18.2" -typing-extensions = ">=4.6.1" +annotated-types = ">=0.6.0" +pydantic-core = "2.27.2" +typing-extensions = ">=4.12.2" [package.extras] email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata"] [[package]] name = "pydantic-core" -version = "2.18.2" +version = "2.27.2" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"}, - {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"}, - {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"}, - {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"}, - {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"}, - {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"}, - {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"}, - {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"}, - {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"}, - {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"}, - {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"}, - {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"}, - {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"}, - {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"}, - {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"}, - {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"}, - {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"}, - {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"}, - {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"}, - {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"}, - {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"}, - {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"}, - {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"}, - {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"}, - {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"}, - {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"}, - {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"}, - {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"}, - {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"}, - {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"}, - {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"}, - {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"}, - {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"}, + {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"}, + {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236"}, + {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962"}, + {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9"}, + {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af"}, + {file = "pydantic_core-2.27.2-cp310-cp310-win32.whl", hash = "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4"}, + {file = "pydantic_core-2.27.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31"}, + {file = "pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc"}, + {file = "pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d"}, + {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b"}, + {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474"}, + {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6"}, + {file = "pydantic_core-2.27.2-cp311-cp311-win32.whl", hash = "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c"}, + {file = "pydantic_core-2.27.2-cp311-cp311-win_amd64.whl", hash = "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc"}, + {file = "pydantic_core-2.27.2-cp311-cp311-win_arm64.whl", hash = "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4"}, + {file = "pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0"}, + {file = "pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4"}, + {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3"}, + {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4"}, + {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57"}, + {file = "pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc"}, + {file = "pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9"}, + {file = "pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b"}, + {file = "pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b"}, + {file = "pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4"}, + {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27"}, + {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee"}, + {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1"}, + {file = "pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130"}, + {file = "pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee"}, + {file = "pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b"}, + {file = "pydantic_core-2.27.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506"}, + {file = "pydantic_core-2.27.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a"}, + {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d"}, + {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9"}, + {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da"}, + {file = "pydantic_core-2.27.2-cp38-cp38-win32.whl", hash = "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b"}, + {file = "pydantic_core-2.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad"}, + {file = "pydantic_core-2.27.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993"}, + {file = "pydantic_core-2.27.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54"}, + {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f"}, + {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362"}, + {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96"}, + {file = "pydantic_core-2.27.2-cp39-cp39-win32.whl", hash = "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e"}, + {file = "pydantic_core-2.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35"}, + {file = "pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39"}, ] [package.dependencies] @@ -2638,6 +2700,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2024.1" @@ -2992,17 +3068,18 @@ mpmath = ">=0.19" [[package]] name = "testcontainers" -version = "4.8.1" +version = "4.9.0" description = "Python library for throwaway instances of anything that can run in a Docker container" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "testcontainers-4.8.1-py3-none-any.whl", hash = "sha256:d8ae43e8fe34060fcd5c3f494e0b7652b7774beabe94568a2283d0881e94d489"}, - {file = "testcontainers-4.8.1.tar.gz", hash = "sha256:5ded4820b7227ad526857eb3caaafcabce1bbac05d22ad194849b136ffae3cb0"}, + {file = "testcontainers-4.9.0-py3-none-any.whl", hash = "sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32"}, + {file = "testcontainers-4.9.0.tar.gz", hash = "sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e"}, ] [package.dependencies] docker = "*" +python-dotenv = "*" typing-extensions = "*" urllib3 = "*" wrapt = "*" @@ -3160,13 +3237,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.6.1" -description = "Backported and Experimental Type Hints for Python 3.7+" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.6.1-py3-none-any.whl", hash = "sha256:6bac751f4789b135c43228e72de18637e9a6c29d12777023a703fd1a6858469f"}, - {file = "typing_extensions-4.6.1.tar.gz", hash = "sha256:558bc0c4145f01e6405f4a5fdbd82050bd221b119f4bf72a961a1cfd471349d6"}, + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] [[package]] @@ -3309,16 +3386,6 @@ files = [ {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, - {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"}, - {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"}, - {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"}, - {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"}, - {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"}, - {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"}, - {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"}, - {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"}, - {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"}, - {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, @@ -3475,54 +3542,108 @@ propcache = ">=0.2.0" [[package]] name = "zstandard" -version = "0.21.0" +version = "0.23.0" description = "Zstandard bindings for Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"}, - {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"}, - {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b72060402524ab91e075881f6b6b3f37ab715663313030d0ce983da44960a86f"}, - {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8257752b97134477fb4e413529edaa04fc0457361d304c1319573de00ba796b1"}, - {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c053b7c4cbf71cc26808ed67ae955836232f7638444d709bfc302d3e499364fa"}, - {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2769730c13638e08b7a983b32cb67775650024632cd0476bf1ba0e6360f5ac7d"}, - {file = "zstandard-0.21.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7d3bc4de588b987f3934ca79140e226785d7b5e47e31756761e48644a45a6766"}, - {file = "zstandard-0.21.0-cp310-cp310-win32.whl", hash = "sha256:67829fdb82e7393ca68e543894cd0581a79243cc4ec74a836c305c70a5943f07"}, - {file = "zstandard-0.21.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6048a287f8d2d6e8bc67f6b42a766c61923641dd4022b7fd3f7439e17ba5a4d"}, - {file = "zstandard-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7f2afab2c727b6a3d466faee6974a7dad0d9991241c498e7317e5ccf53dbc766"}, - {file = "zstandard-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff0852da2abe86326b20abae912d0367878dd0854b8931897d44cfeb18985472"}, - {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d12fa383e315b62630bd407477d750ec96a0f438447d0e6e496ab67b8b451d39"}, - {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1b9703fe2e6b6811886c44052647df7c37478af1b4a1a9078585806f42e5b15"}, - {file = "zstandard-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df28aa5c241f59a7ab524f8ad8bb75d9a23f7ed9d501b0fed6d40ec3064784e8"}, - {file = "zstandard-0.21.0-cp311-cp311-win32.whl", hash = "sha256:0aad6090ac164a9d237d096c8af241b8dcd015524ac6dbec1330092dba151657"}, - {file = "zstandard-0.21.0-cp311-cp311-win_amd64.whl", hash = "sha256:48b6233b5c4cacb7afb0ee6b4f91820afbb6c0e3ae0fa10abbc20000acdf4f11"}, - {file = "zstandard-0.21.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e7d560ce14fd209db6adacce8908244503a009c6c39eee0c10f138996cd66d3e"}, - {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e6e131a4df2eb6f64961cea6f979cdff22d6e0d5516feb0d09492c8fd36f3bc"}, - {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e0c62a67ff425927898cf43da2cf6b852289ebcc2054514ea9bf121bec10a5"}, - {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1545fb9cb93e043351d0cb2ee73fa0ab32e61298968667bb924aac166278c3fc"}, - {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe6c821eb6870f81d73bf10e5deed80edcac1e63fbc40610e61f340723fd5f7c"}, - {file = "zstandard-0.21.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ddb086ea3b915e50f6604be93f4f64f168d3fc3cef3585bb9a375d5834392d4f"}, - {file = "zstandard-0.21.0-cp37-cp37m-win32.whl", hash = "sha256:57ac078ad7333c9db7a74804684099c4c77f98971c151cee18d17a12649bc25c"}, - {file = "zstandard-0.21.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1243b01fb7926a5a0417120c57d4c28b25a0200284af0525fddba812d575f605"}, - {file = "zstandard-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea68b1ba4f9678ac3d3e370d96442a6332d431e5050223626bdce748692226ea"}, - {file = "zstandard-0.21.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8070c1cdb4587a8aa038638acda3bd97c43c59e1e31705f2766d5576b329e97c"}, - {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af612c96599b17e4930fe58bffd6514e6c25509d120f4eae6031b7595912f85"}, - {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff891e37b167bc477f35562cda1248acc115dbafbea4f3af54ec70821090965"}, - {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a9fec02ce2b38e8b2e86079ff0b912445495e8ab0b137f9c0505f88ad0d61296"}, - {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bdbe350691dec3078b187b8304e6a9c4d9db3eb2d50ab5b1d748533e746d099"}, - {file = "zstandard-0.21.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b69cccd06a4a0a1d9fb3ec9a97600055cf03030ed7048d4bcb88c574f7895773"}, - {file = "zstandard-0.21.0-cp38-cp38-win32.whl", hash = "sha256:9980489f066a391c5572bc7dc471e903fb134e0b0001ea9b1d3eff85af0a6f1b"}, - {file = "zstandard-0.21.0-cp38-cp38-win_amd64.whl", hash = "sha256:0e1e94a9d9e35dc04bf90055e914077c80b1e0c15454cc5419e82529d3e70728"}, - {file = "zstandard-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d2d61675b2a73edcef5e327e38eb62bdfc89009960f0e3991eae5cc3d54718de"}, - {file = "zstandard-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25fbfef672ad798afab12e8fd204d122fca3bc8e2dcb0a2ba73bf0a0ac0f5f07"}, - {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62957069a7c2626ae80023998757e27bd28d933b165c487ab6f83ad3337f773d"}, - {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e10ed461e4807471075d4b7a2af51f5234c8f1e2a0c1d37d5ca49aaaad49e8"}, - {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9cff89a036c639a6a9299bf19e16bfb9ac7def9a7634c52c257166db09d950e7"}, - {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52b2b5e3e7670bd25835e0e0730a236f2b0df87672d99d3bf4bf87248aa659fb"}, - {file = "zstandard-0.21.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b1367da0dde8ae5040ef0413fb57b5baeac39d8931c70536d5f013b11d3fc3a5"}, - {file = "zstandard-0.21.0-cp39-cp39-win32.whl", hash = "sha256:db62cbe7a965e68ad2217a056107cc43d41764c66c895be05cf9c8b19578ce9c"}, - {file = "zstandard-0.21.0-cp39-cp39-win_amd64.whl", hash = "sha256:a8d200617d5c876221304b0e3fe43307adde291b4a897e7b0617a61611dfff6a"}, - {file = "zstandard-0.21.0.tar.gz", hash = "sha256:f08e3a10d01a247877e4cb61a82a319ea746c356a3786558bed2481e6c405546"}, + {file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"}, + {file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77da4c6bfa20dd5ea25cbf12c76f181a8e8cd7ea231c673828d0386b1740b8dc"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2170c7e0367dde86a2647ed5b6f57394ea7f53545746104c6b09fc1f4223573"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c16842b846a8d2a145223f520b7e18b57c8f476924bda92aeee3a88d11cfc391"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:157e89ceb4054029a289fb504c98c6a9fe8010f1680de0201b3eb5dc20aa6d9e"}, + {file = "zstandard-0.23.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:203d236f4c94cd8379d1ea61db2fce20730b4c38d7f1c34506a31b34edc87bdd"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dc5d1a49d3f8262be192589a4b72f0d03b72dcf46c51ad5852a4fdc67be7b9e4"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:752bf8a74412b9892f4e5b58f2f890a039f57037f52c89a740757ebd807f33ea"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80080816b4f52a9d886e67f1f96912891074903238fe54f2de8b786f86baded2"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:84433dddea68571a6d6bd4fbf8ff398236031149116a7fff6f777ff95cad3df9"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19a2d91963ed9e42b4e8d77cd847ae8381576585bad79dbd0a8837a9f6620a"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:59556bf80a7094d0cfb9f5e50bb2db27fefb75d5138bb16fb052b61b0e0eeeb0"}, + {file = "zstandard-0.23.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:27d3ef2252d2e62476389ca8f9b0cf2bbafb082a3b6bfe9d90cbcbb5529ecf7c"}, + {file = "zstandard-0.23.0-cp310-cp310-win32.whl", hash = "sha256:5d41d5e025f1e0bccae4928981e71b2334c60f580bdc8345f824e7c0a4c2a813"}, + {file = "zstandard-0.23.0-cp310-cp310-win_amd64.whl", hash = "sha256:519fbf169dfac1222a76ba8861ef4ac7f0530c35dd79ba5727014613f91613d4"}, + {file = "zstandard-0.23.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:34895a41273ad33347b2fc70e1bff4240556de3c46c6ea430a7ed91f9042aa4e"}, + {file = "zstandard-0.23.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77ea385f7dd5b5676d7fd943292ffa18fbf5c72ba98f7d09fc1fb9e819b34c23"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:983b6efd649723474f29ed42e1467f90a35a74793437d0bc64a5bf482bedfa0a"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80a539906390591dd39ebb8d773771dc4db82ace6372c4d41e2d293f8e32b8db"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:445e4cb5048b04e90ce96a79b4b63140e3f4ab5f662321975679b5f6360b90e2"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd30d9c67d13d891f2360b2a120186729c111238ac63b43dbd37a5a40670b8ca"}, + {file = "zstandard-0.23.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d20fd853fbb5807c8e84c136c278827b6167ded66c72ec6f9a14b863d809211c"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed1708dbf4d2e3a1c5c69110ba2b4eb6678262028afd6c6fbcc5a8dac9cda68e"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:be9b5b8659dff1f913039c2feee1aca499cfbc19e98fa12bc85e037c17ec6ca5"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:65308f4b4890aa12d9b6ad9f2844b7ee42c7f7a4fd3390425b242ffc57498f48"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:98da17ce9cbf3bfe4617e836d561e433f871129e3a7ac16d6ef4c680f13a839c"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8ed7d27cb56b3e058d3cf684d7200703bcae623e1dcc06ed1e18ecda39fee003"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:b69bb4f51daf461b15e7b3db033160937d3ff88303a7bc808c67bbc1eaf98c78"}, + {file = "zstandard-0.23.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473"}, + {file = "zstandard-0.23.0-cp311-cp311-win32.whl", hash = "sha256:f2d4380bf5f62daabd7b751ea2339c1a21d1c9463f1feb7fc2bdcea2c29c3160"}, + {file = "zstandard-0.23.0-cp311-cp311-win_amd64.whl", hash = "sha256:62136da96a973bd2557f06ddd4e8e807f9e13cbb0bfb9cc06cfe6d98ea90dfe0"}, + {file = "zstandard-0.23.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094"}, + {file = "zstandard-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373"}, + {file = "zstandard-0.23.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90"}, + {file = "zstandard-0.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35"}, + {file = "zstandard-0.23.0-cp312-cp312-win32.whl", hash = "sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d"}, + {file = "zstandard-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b"}, + {file = "zstandard-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9"}, + {file = "zstandard-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed"}, + {file = "zstandard-0.23.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057"}, + {file = "zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33"}, + {file = "zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd"}, + {file = "zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b"}, + {file = "zstandard-0.23.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2ef3775758346d9ac6214123887d25c7061c92afe1f2b354f9388e9e4d48acfc"}, + {file = "zstandard-0.23.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4051e406288b8cdbb993798b9a45c59a4896b6ecee2f875424ec10276a895740"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2d1a054f8f0a191004675755448d12be47fa9bebbcffa3cdf01db19f2d30a54"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f83fa6cae3fff8e98691248c9320356971b59678a17f20656a9e59cd32cee6d8"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32ba3b5ccde2d581b1e6aa952c836a6291e8435d788f656fe5976445865ae045"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f146f50723defec2975fb7e388ae3a024eb7151542d1599527ec2aa9cacb152"}, + {file = "zstandard-0.23.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bfe8de1da6d104f15a60d4a8a768288f66aa953bbe00d027398b93fb9680b26"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:29a2bc7c1b09b0af938b7a8343174b987ae021705acabcbae560166567f5a8db"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:61f89436cbfede4bc4e91b4397eaa3e2108ebe96d05e93d6ccc95ab5714be512"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:53ea7cdc96c6eb56e76bb06894bcfb5dfa93b7adcf59d61c6b92674e24e2dd5e"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:a4ae99c57668ca1e78597d8b06d5af837f377f340f4cce993b551b2d7731778d"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:379b378ae694ba78cef921581ebd420c938936a153ded602c4fea612b7eaa90d"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:50a80baba0285386f97ea36239855f6020ce452456605f262b2d33ac35c7770b"}, + {file = "zstandard-0.23.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:61062387ad820c654b6a6b5f0b94484fa19515e0c5116faf29f41a6bc91ded6e"}, + {file = "zstandard-0.23.0-cp38-cp38-win32.whl", hash = "sha256:b8c0bd73aeac689beacd4e7667d48c299f61b959475cdbb91e7d3d88d27c56b9"}, + {file = "zstandard-0.23.0-cp38-cp38-win_amd64.whl", hash = "sha256:a05e6d6218461eb1b4771d973728f0133b2a4613a6779995df557f70794fd60f"}, + {file = "zstandard-0.23.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa014d55c3af933c1315eb4bb06dd0459661cc0b15cd61077afa6489bec63bb"}, + {file = "zstandard-0.23.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7f0804bb3799414af278e9ad51be25edf67f78f916e08afdb983e74161b916"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb2b1ecfef1e67897d336de3a0e3f52478182d6a47eda86cbd42504c5cbd009a"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:837bb6764be6919963ef41235fd56a6486b132ea64afe5fafb4cb279ac44f259"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1516c8c37d3a053b01c1c15b182f3b5f5eef19ced9b930b684a73bad121addf4"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48ef6a43b1846f6025dde6ed9fee0c24e1149c1c25f7fb0a0585572b2f3adc58"}, + {file = "zstandard-0.23.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11e3bf3c924853a2d5835b24f03eeba7fc9b07d8ca499e247e06ff5676461a15"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2fb4535137de7e244c230e24f9d1ec194f61721c86ebea04e1581d9d06ea1269"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8c24f21fa2af4bb9f2c492a86fe0c34e6d2c63812a839590edaf177b7398f700"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8c86881813a78a6f4508ef9daf9d4995b8ac2d147dcb1a450448941398091c9"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fe3b385d996ee0822fd46528d9f0443b880d4d05528fd26a9119a54ec3f91c69"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:82d17e94d735c99621bf8ebf9995f870a6b3e6d14543b99e201ae046dfe7de70"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c7c517d74bea1a6afd39aa612fa025e6b8011982a0897768a2f7c8ab4ebb78a2"}, + {file = "zstandard-0.23.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fd7e0f1cfb70eb2f95a19b472ee7ad6d9a0a992ec0ae53286870c104ca939e5"}, + {file = "zstandard-0.23.0-cp39-cp39-win32.whl", hash = "sha256:43da0f0092281bf501f9c5f6f3b4c975a8a0ea82de49ba3f7100e64d422a1274"}, + {file = "zstandard-0.23.0-cp39-cp39-win_amd64.whl", hash = "sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58"}, + {file = "zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09"}, ] [package.dependencies] @@ -3534,4 +3655,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "9032c11f264f2f6d8a50230e5021c606d460aafdf370da0524784c3f0f1f31b1" +content-hash = "e6904aca09abc6c805604b21a5702a97e0056406f9ec7469b091d35ee10a6b16" diff --git a/pyproject.toml b/pyproject.toml index ba4ab0b1f7..735d12d756 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,11 +7,11 @@ package-mode = false python = "^3.11" pytest = "^7.4.4" psycopg2-binary = "^2.9.10" -typing-extensions = "^4.6.1" +typing-extensions = "^4.12.2" PyJWT = {version = "^2.1.0", extras = ["crypto"]} requests = "^2.32.3" pytest-xdist = "^3.3.1" -asyncpg = "^0.29.0" +asyncpg = "^0.30.0" aiopg = "^1.4.0" Jinja2 = "^3.1.5" types-requests = "^2.31.0.0" @@ -36,7 +36,7 @@ aiohttp = "3.10.11" pytest-rerunfailures = "^15.0" types-pytest-lazy-fixture = "^0.6.3.3" pytest-split = "^0.8.1" -zstandard = "^0.21.0" +zstandard = "^0.23.0" httpx = {extras = ["http2"], version = "^0.26.0"} pytest-repeat = "^0.9.3" websockets = "^12.0" @@ -47,8 +47,9 @@ h2 = "^4.1.0" types-jwcrypto = "^1.5.0.20240925" pyyaml = "^6.0.2" types-pyyaml = "^6.0.12.20240917" -testcontainers = "^4.8.1" -jsonnet = "^0.20.0" +testcontainers = "^4.9.0" +# Jsonnet doesn't support Python 3.13 yet +jsonnet = { version = "^0.20.0", markers = "python_version < '3.13'" } [tool.poetry.group.dev.dependencies] mypy = "==1.13.0" diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py index 787790103f..71963355b7 100644 --- a/test_runner/regress/test_compute_metrics.py +++ b/test_runner/regress/test_compute_metrics.py @@ -3,12 +3,11 @@ from __future__ import annotations import enum import os import shutil +import sys from enum import StrEnum from pathlib import Path from typing import TYPE_CHECKING, cast -# Docs are available at https://jsonnet.org/ref/bindings.html#python_api -import _jsonnet import pytest import requests import yaml @@ -87,6 +86,10 @@ def jsonnet_evaluate_file( ext_vars: str | dict[str, str] | None = None, tla_vars: str | dict[str, str] | None = None, ) -> str: + # Jsonnet doesn't support Python 3.13 yet + # Docs are available at https://jsonnet.org/ref/bindings.html#python_api + import _jsonnet + return cast( "str", _jsonnet.evaluate_file( @@ -121,6 +124,7 @@ class SqlExporterProcess(StrEnum): AUTOSCALING = "autoscaling" +@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet") @pytest.mark.parametrize( "collector_name", ["neon_collector", "neon_collector_autoscaling"], @@ -352,6 +356,7 @@ else: self.__proc.wait() +@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet") @pytest.mark.parametrize( "exporter", [SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING], From 02f81b6469c88187ddda548c2dbe32c8c5a9a41d Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Mon, 6 Jan 2025 20:28:33 +0000 Subject: [PATCH 84/89] Fix clippy warning on macOS (#10282) ## Problem On macOS: ``` error: unused variable: `disable_lfc_resizing` --> compute_tools/src/bin/compute_ctl.rs:431:9 | 431 | disable_lfc_resizing, | ^^^^^^^^^^^^^^^^^^^^ help: try ignoring the field: `disable_lfc_resizing: _` | = note: `-D unused-variables` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(unused_variables)]` ``` ## Summary of changes - Initialise `disable_lfc_resizing` only on Linux (because it's used on Linux only in further bloc) --- compute_tools/src/bin/compute_ctl.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 26ae25ec20..6ede5fdceb 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -428,6 +428,7 @@ fn start_postgres( let &ComputeSpec { swap_size_bytes, disk_quota_bytes, + #[cfg(target_os = "linux")] disable_lfc_resizing, .. } = &state.pspec.as_ref().unwrap().spec; From 30863c010421affd737977dbfe21ea08f18a43cb Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Tue, 7 Jan 2025 10:07:38 +0100 Subject: [PATCH 85/89] libpagestore: timeout = max(0, difference), not min(0, difference) (#10274) Using `min(0, ...)` causes us to fail to wait in most situations, so a lack of data would be a hot wait loop, which is bad. ## Problem We noticed high CPU usage in some situations --- pgxn/neon/libpagestore.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 88d0a5292b..fa2a570ea8 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -680,7 +680,7 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer) * but in the cases that take exceptionally long, it's useful to log the * exact timestamps. */ -#define LOG_INTERVAL_US UINT64CONST(10 * 1000000) +#define LOG_INTERVAL_MS INT64CONST(10 * 1000) INSTR_TIME_SET_CURRENT(now); start_ts = last_log_ts = now; @@ -694,7 +694,7 @@ retry: WaitEvent event; long timeout; - timeout = Min(0, LOG_INTERVAL_US - INSTR_TIME_GET_MICROSEC(since_last_log)); + timeout = Max(0, LOG_INTERVAL_MS - INSTR_TIME_GET_MILLISEC(since_last_log)); /* Sleep until there's something to do */ (void) WaitEventSetWait(shard->wes_read, timeout, &event, 1, @@ -723,7 +723,7 @@ retry: INSTR_TIME_SET_CURRENT(now); since_last_log = now; INSTR_TIME_SUBTRACT(since_last_log, last_log_ts); - if (INSTR_TIME_GET_MICROSEC(since_last_log) >= LOG_INTERVAL_US) + if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS) { since_start = now; INSTR_TIME_SUBTRACT(since_start, start_ts); From ea84ec357fa4caa5a48ec65a0aab9e37d1a9fda4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Tue, 7 Jan 2025 11:36:05 +0100 Subject: [PATCH 86/89] Split promote-images into promote-images-dev and promote-images-prod (#10267) ## Problem `trigger-e2e-tests` waits half an hour before starting to run. Nearly half of that time can be saved by promoting images before tests on them are complete, so the e2e tests can run in parallel. On `main` and `release{,-proxy,-compute}`, `promote-images` updates `latest` and pushes things to prod ecr, so we want to run `promote-images` only after `test-images` is done, but on other branches, there is no harm in promoting images that aren't tested yet. ## Summary of changes To promote images into dev container registries sooner, `promote-images` is split into `promote-images-dev` and `promote-images-prod`. The former pushes to dev container registries, the latter to prod ones. The latter also waits for `test-images`, while the former doesn't. This allows to run `trigger-e2e-tests` sooner. --- .github/workflows/actionlint.yml | 2 +- .github/workflows/build_and_test.yml | 35 +++++++++++++++++++------ .github/workflows/trigger-e2e-tests.yml | 8 +++--- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml index 85cfe7446e..0e53830040 100644 --- a/.github/workflows/actionlint.yml +++ b/.github/workflows/actionlint.yml @@ -33,7 +33,7 @@ jobs: # SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086 SHELLCHECK_OPTS: --exclude=SC2046,SC2086 with: - fail_on_error: true + fail_level: error filter_mode: nofilter level: error diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 12b1ac98ac..5c2b397c82 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -538,7 +538,7 @@ jobs: trigger-e2e-tests: if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }} - needs: [ check-permissions, promote-images, tag ] + needs: [ check-permissions, promote-images-dev, tag ] uses: ./.github/workflows/trigger-e2e-tests.yml secrets: inherit @@ -930,8 +930,8 @@ jobs: docker compose -f ./docker-compose/docker-compose.yml logs || 0 docker compose -f ./docker-compose/docker-compose.yml down - promote-images: - needs: [ check-permissions, tag, test-images, vm-compute-node-image ] + promote-images-dev: + needs: [ check-permissions, tag, vm-compute-node-image ] runs-on: ubuntu-22.04 permissions: @@ -965,6 +965,25 @@ jobs: neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }} done + promote-images-prod: + needs: [ check-permissions, tag, test-images, vm-compute-node-image ] + runs-on: ubuntu-22.04 + if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' + + permissions: + id-token: write # aws-actions/configure-aws-credentials + statuses: write + contents: read + + env: + VERSIONS: v14 v15 v16 v17 + + steps: + - uses: docker/login-action@v3 + with: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + - name: Add latest tag to images if: github.ref_name == 'main' run: | @@ -1010,7 +1029,7 @@ jobs: push-to-acr-dev: if: github.ref_name == 'main' - needs: [ tag, promote-images ] + needs: [ tag, promote-images-dev ] uses: ./.github/workflows/_push-to-acr.yml with: client_id: ${{ vars.AZURE_DEV_CLIENT_ID }} @@ -1022,7 +1041,7 @@ jobs: push-to-acr-prod: if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' - needs: [ tag, promote-images ] + needs: [ tag, promote-images-prod ] uses: ./.github/workflows/_push-to-acr.yml with: client_id: ${{ vars.AZURE_PROD_CLIENT_ID }} @@ -1112,7 +1131,7 @@ jobs: exit 1 deploy: - needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ] + needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ] # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled() permissions: @@ -1333,7 +1352,7 @@ jobs: done pin-build-tools-image: - needs: [ build-build-tools-image, promote-images, build-and-test-locally ] + needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ] if: github.ref_name == 'main' uses: ./.github/workflows/pin-build-tools-image.yml with: @@ -1356,7 +1375,7 @@ jobs: - build-and-test-locally - check-codestyle-python - check-codestyle-rust - - promote-images + - promote-images-dev - test-images - trigger-custom-extensions-build-and-wait runs-on: ubuntu-22.04 diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml index 70c2e8549f..31696248b0 100644 --- a/.github/workflows/trigger-e2e-tests.yml +++ b/.github/workflows/trigger-e2e-tests.yml @@ -68,7 +68,7 @@ jobs: GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} TAG: ${{ needs.tag.outputs.build-tag }} steps: - - name: Wait for `promote-images` job to finish + - name: Wait for `promote-images-dev` job to finish # It's important to have a timeout here, the script in the step can run infinitely timeout-minutes: 60 run: | @@ -79,17 +79,17 @@ jobs: # For PRs we use the run id as the tag BUILD_AND_TEST_RUN_ID=${TAG} while true; do - conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion') + conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion') case "$conclusion" in success) break ;; failure | cancelled | skipped) - echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..." + echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..." exit 1 ;; *) - echo "The 'promote-images' hasn't succeed yet. Waiting..." + echo "The 'promote-images-dev' hasn't succeed yet. Waiting..." sleep 60 ;; esac From be38123e62b029dcd9f9cc0beb765ad2d3333906 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Tue, 7 Jan 2025 11:41:52 +0100 Subject: [PATCH 87/89] Fix accounting of dropped prefetched GetPage requests (#10276) Apparently, we failed to do this bookkeeping in quite a few places... ## Problem Fixes https://github.com/neondatabase/cloud/issues/22364 ## Summary of changes Add accounting of dropped requests. Note that this includes prefetches dropped due to things like "PS connection dropped unexpectedly" or "prefetch queue is already full", but *not* (yet?) "dropped due to backend shutdown". --- pgxn/neon/pagestore_smgr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 385905d9ce..b733807026 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -716,6 +716,8 @@ prefetch_on_ps_disconnect(void) MyPState->ring_receive += 1; prefetch_set_unused(ring_index); + pgBufferUsage.prefetch.expired += 1; + MyNeonCounters->getpage_prefetch_discards_total += 1; } /* @@ -935,7 +937,8 @@ Retry: prefetch_set_unused(ring_index); entry = NULL; slot = NULL; - MyNeonCounters->getpage_prefetch_discards_total++; + pgBufferUsage.prefetch.expired += 1; + MyNeonCounters->getpage_prefetch_discards_total += 1; } } @@ -1026,10 +1029,14 @@ Retry: if (!prefetch_wait_for(cleanup_index)) goto Retry; prefetch_set_unused(cleanup_index); + pgBufferUsage.prefetch.expired += 1; + MyNeonCounters->getpage_prefetch_discards_total += 1; break; case PRFS_RECEIVED: case PRFS_TAG_REMAINS: prefetch_set_unused(cleanup_index); + pgBufferUsage.prefetch.expired += 1; + MyNeonCounters->getpage_prefetch_discards_total += 1; break; default: pg_unreachable(); From 6292d93867e69247377d5a5d105359b5659afa82 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 10:48:11 +0000 Subject: [PATCH 88/89] Compute release 2025-01-07 From 31bd2dcdb409b6059e21d7293c7b1ff8dbd1e2b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Tue, 7 Jan 2025 14:45:18 +0100 Subject: [PATCH 89/89] Fix promote-images-prod after splitting it out (#10292) ## Problem `promote-images` was split into `promote-images-dev` and `promote-images-prod` in https://github.com/neondatabase/neon/pull/10267. `dev` credentials were loaded in `promote-images-dev` and `prod` credentials were loaded in `promote-images-prod`, but `promote-images-prod` needs `dev` credentials as well to access the `dev` images to replicate them from `dev` to `prod`. ## Summary of changes Load `dev` credentials in `promote-images-prod` as well. --- .github/workflows/build_and_test.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5c2b397c82..01f5c3ede9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -979,6 +979,16 @@ jobs: VERSIONS: v14 v15 v16 v17 steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 3600 + + - name: Login to Amazon Dev ECR + uses: aws-actions/amazon-ecr-login@v2 + - uses: docker/login-action@v3 with: username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}