diff --git a/.dockerignore b/.dockerignore index c7a2f78e32..3c4a748cf7 100644 --- a/.dockerignore +++ b/.dockerignore @@ -13,6 +13,7 @@ # Directories !.cargo/ !.config/ +!compute/ !compute_tools/ !control_plane/ !libs/ diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 67152b6991..5ea911eb95 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -257,7 +257,15 @@ jobs: ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)' - name: Install postgres binaries - run: cp -a pg_install /tmp/neon/pg_install + run: | + # Use tar to copy files matching the pattern, preserving the paths in the destionation + tar c \ + pg_install/v* \ + pg_install/build/*/src/test/regress/*.so \ + pg_install/build/*/src/test/regress/pg_regress \ + pg_install/build/*/src/test/isolation/isolationtester \ + pg_install/build/*/src/test/isolation/pg_isolation_regress \ + | tar x -C /tmp/neon - name: Upload Neon artifact uses: ./.github/actions/upload diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a210c962cb..81a9fd99ae 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -120,6 +120,59 @@ jobs: - name: Run mypy to check types run: poetry run mypy . + # Check that the vendor/postgres-* submodules point to the + # corresponding REL_*_STABLE_neon branches. + check-submodules: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: true + + - uses: dorny/paths-filter@v3 + id: check-if-submodules-changed + with: + filters: | + vendor: + - 'vendor/**' + + - name: Check vendor/postgres-v14 submodule reference + if: steps.check-if-submodules-changed.outputs.vendor == 'true' + uses: jtmullen/submodule-branch-check-action@v1 + with: + path: "vendor/postgres-v14" + fetch_depth: "50" + sub_fetch_depth: "50" + pass_if_unchanged: true + + - name: Check vendor/postgres-v15 submodule reference + if: steps.check-if-submodules-changed.outputs.vendor == 'true' + uses: jtmullen/submodule-branch-check-action@v1 + with: + path: "vendor/postgres-v15" + fetch_depth: "50" + sub_fetch_depth: "50" + pass_if_unchanged: true + + - name: Check vendor/postgres-v16 submodule reference + if: steps.check-if-submodules-changed.outputs.vendor == 'true' + uses: jtmullen/submodule-branch-check-action@v1 + with: + path: "vendor/postgres-v16" + fetch_depth: "50" + sub_fetch_depth: "50" + pass_if_unchanged: true + + - name: Check vendor/postgres-v17 submodule reference + if: steps.check-if-submodules-changed.outputs.vendor == 'true' + uses: jtmullen/submodule-branch-check-action@v1 + with: + path: "vendor/postgres-v17" + fetch_depth: "50" + sub_fetch_depth: "50" + pass_if_unchanged: true + check-codestyle-rust: needs: [ check-permissions, build-build-tools-image ] strategy: @@ -159,6 +212,10 @@ jobs: # This will catch compiler & clippy warnings in all feature combinations. # TODO: use cargo hack for build and test as well, but, that's quite expensive. # NB: keep clippy args in sync with ./run_clippy.sh + # + # The only difference between "clippy --debug" and "clippy --release" is that in --release mode, + # #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second + # time just for that, so skip "clippy --release". - run: | CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")" if [ "$CLIPPY_COMMON_ARGS" = "" ]; then @@ -168,8 +225,6 @@ jobs: echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV - name: Run cargo clippy (debug) run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS - - name: Run cargo clippy (release) - run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS - name: Check documentation generation run: cargo doc --workspace --no-deps --document-private-items @@ -547,7 +602,20 @@ jobs: strategy: fail-fast: false matrix: - version: [ v14, v15, v16, v17 ] + version: + # Much data was already generated on old PG versions with bullseye's + # libraries, the locales of which can cause data incompatibilities. + # However, new PG versions should check if they can be built on newer + # images, as that reduces the support burden of old and ancient + # distros. + - pg: v14 + debian: bullseye-slim + - pg: v15 + debian: bullseye-slim + - pg: v16 + debian: bullseye-slim + - pg: v17 + debian: bookworm-slim arch: [ x64, arm64 ] runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} @@ -590,41 +658,46 @@ jobs: context: . build-args: | GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} - PG_VERSION=${{ matrix.version }} + PG_VERSION=${{ matrix.version.pg }} BUILD_TAG=${{ needs.tag.outputs.build-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }} + DEBIAN_FLAVOR=${{ matrix.version.debian }} provenance: false push: true pull: true - file: Dockerfile.compute-node - cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }} - cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }} + file: compute/Dockerfile.compute-node + cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.arch }} + cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }} tags: | - neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }} + neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }} - name: Build neon extensions test image - if: matrix.version == 'v16' + if: matrix.version.pg == 'v16' uses: docker/build-push-action@v6 with: context: . build-args: | GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} - PG_VERSION=${{ matrix.version }} + PG_VERSION=${{ matrix.version.pg }} BUILD_TAG=${{ needs.tag.outputs.build-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }} + DEBIAN_FLAVOR=${{ matrix.version.debian }} provenance: false push: true pull: true - file: Dockerfile.compute-node + file: compute/Dockerfile.compute-node target: neon-pg-ext-test - cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }} - cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }} + cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.arch }} + cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }} tags: | - neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }} + neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }} - name: Build compute-tools image # compute-tools are Postgres independent, so build it only once - if: matrix.version == 'v17' + # We pick 16, because that builds on debian 11 with older glibc (and is + # thus compatible with newer glibc), rather than 17 on Debian 12, as + # that isn't guaranteed to be compatible with Debian 11 + if: matrix.version.pg == 'v16' uses: docker/build-push-action@v6 with: target: compute-tools-image @@ -633,10 +706,11 @@ jobs: GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} BUILD_TAG=${{ needs.tag.outputs.build-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }} + DEBIAN_FLAVOR=${{ matrix.version.debian }} provenance: false push: true pull: true - file: Dockerfile.compute-node + file: compute/Dockerfile.compute-node tags: | neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }} @@ -724,7 +798,7 @@ jobs: - name: Build vm image run: | ./vm-builder \ - -spec=vm-image-spec.yaml \ + -spec=compute/vm-image-spec.yaml \ -src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \ -dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} @@ -788,6 +862,9 @@ jobs: needs: [ check-permissions, tag, test-images, vm-compute-node-image ] runs-on: ubuntu-22.04 + permissions: + id-token: write # for `aws-actions/configure-aws-credentials` + env: VERSIONS: v14 v15 v16 v17 @@ -832,13 +909,19 @@ jobs: docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \ neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }} + - name: Configure AWS-prod credentials + if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + mask-aws-account-id: true + role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }} + - name: Login to prod ECR uses: docker/login-action@v3 if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' with: registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com - username: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_ACCESS_KEY_ID }} - password: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_SECRET_ACCESS_KEY }} - name: Copy all images to prod ECR if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml new file mode 100644 index 0000000000..ecafe183f8 --- /dev/null +++ b/.github/workflows/cloud-regress.yml @@ -0,0 +1,102 @@ +name: Cloud Regression Test +on: + schedule: + # * is a special character in YAML so you have to quote this string + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + - cron: '45 1 * * *' # run once a day, timezone is utc + workflow_dispatch: # adds ability to run this manually + +defaults: + run: + shell: bash -euxo pipefail {0} + +concurrency: + # Allow only one workflow + group: ${{ github.workflow }} + cancel-in-progress: true + +jobs: + regress: + env: + POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install + DEFAULT_PG_VERSION: 16 + TEST_OUTPUT: /tmp/test_output + BUILD_TYPE: remote + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} + + runs-on: us-east-2 + container: + image: neondatabase/build-tools:pinned + options: --init + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Patch the test + run: | + cd "vendor/postgres-v${DEFAULT_PG_VERSION}" + patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch" + + - name: Generate a random password + id: pwgen + run: | + set +x + DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64) + echo "::add-mask::${DBPASS//\//}" + echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}" + + - name: Change tests according to the generated password + env: + DBPASS: ${{ steps.pwgen.outputs.DBPASS }} + run: | + cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress + for fname in sql/*.sql expected/*.out; do + sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}" + done + for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do + USER=$(echo "${ph}" | cut -c 22-) + MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}') + sed -i.bak "s/${ph}/${MD5}/" expected/password.out + done + + - name: Download Neon artifact + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact + path: /tmp/neon/ + prefix: latest + + - name: Run the regression tests + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: cloud_regress + pg_version: ${{ env.DEFAULT_PG_VERSION }} + extra_params: -m remote_cluster + env: + BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }} + + - name: Create Allure report + id: create-allure-report + if: ${{ !cancelled() }} + uses: ./.github/actions/allure-report-generate + + - name: Post to a Slack channel + if: ${{ github.event.schedule && failure() }} + uses: slackapi/slack-github-action@v1 + with: + channel-id: "C033QLM5P7D" # on-call-staging-stream + slack-message: | + Periodic pg_regress on staging: ${{ job.status }} + <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> + <${{ steps.create-allure-report.outputs.report-url }}|Allure report> + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml index b299cf9b99..cad9764532 100644 --- a/.github/workflows/trigger-e2e-tests.yml +++ b/.github/workflows/trigger-e2e-tests.yml @@ -102,12 +102,12 @@ jobs: # Default set of platforms to run e2e tests on platforms='["docker", "k8s"]' - # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms. + # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or compute/Dockerfile.compute-node, add k8s-neonvm to the list of platforms. # If the workflow run is not a pull request, add k8s-neonvm to the list. if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do case "$f" in - vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node) + vendor/*|pgxn/*|libs/vm_monitor/*|compute/Dockerfile.compute-node) platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique') ;; *) diff --git a/Cargo.lock b/Cargo.lock index 136f07956f..d0702e09d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,12 +255,6 @@ dependencies = [ "syn 2.0.52", ] -[[package]] -name = "atomic" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" - [[package]] name = "atomic-take" version = "1.1.0" @@ -295,8 +289,8 @@ dependencies = [ "fastrand 2.0.0", "hex", "http 0.2.9", - "hyper 0.14.26", - "ring 0.17.6", + "hyper 0.14.30", + "ring", "time", "tokio", "tracing", @@ -486,7 +480,7 @@ dependencies = [ "once_cell", "p256 0.11.1", "percent-encoding", - "ring 0.17.6", + "ring", "sha2", "subtle", "time", @@ -593,7 +587,7 @@ dependencies = [ "http 0.2.9", "http-body 0.4.5", "http-body 1.0.0", - "hyper 0.14.26", + "hyper 0.14.30", "hyper-rustls 0.24.0", "once_cell", "pin-project-lite", @@ -684,7 +678,7 @@ dependencies = [ "futures-util", "http 0.2.9", "http-body 0.4.5", - "hyper 0.14.26", + "hyper 0.14.30", "itoa", "matchit 0.7.0", "memchr", @@ -1089,9 +1083,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -1100,18 +1094,18 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", - "half 1.8.2", + "half", ] [[package]] @@ -1224,7 +1218,7 @@ dependencies = [ "compute_api", "flate2", "futures", - "hyper 0.14.26", + "hyper 0.14.30", "nix 0.27.1", "notify", "num_cpus", @@ -1327,10 +1321,9 @@ dependencies = [ "clap", "comfy-table", "compute_api", - "git-version", "humantime", "humantime-serde", - "hyper 0.14.26", + "hyper 0.14.30", "nix 0.27.1", "once_cell", "pageserver_api", @@ -2304,12 +2297,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - [[package]] name = "half" version = "2.4.1" @@ -2411,17 +2398,6 @@ dependencies = [ "digest", ] -[[package]] -name = "hostname" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" -dependencies = [ - "libc", - "match_cfg", - "winapi", -] - [[package]] name = "hostname" version = "0.4.0" @@ -2430,7 +2406,7 @@ checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba" dependencies = [ "cfg-if", "libc", - "windows 0.52.0", + "windows", ] [[package]] @@ -2539,9 +2515,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.26" +version = "0.14.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" +checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" dependencies = [ "bytes", "futures-channel", @@ -2554,7 +2530,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2", "tokio", "tower-service", "tracing", @@ -2589,7 +2565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" dependencies = [ "http 0.2.9", - "hyper 0.14.26", + "hyper 0.14.30", "log", "rustls 0.21.11", "rustls-native-certs 0.6.2", @@ -2620,7 +2596,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper 0.14.26", + "hyper 0.14.30", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -2639,7 +2615,7 @@ dependencies = [ "http-body 1.0.0", "hyper 1.2.0", "pin-project-lite", - "socket2 0.5.5", + "socket2", "tokio", "tower", "tower-service", @@ -2648,16 +2624,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.56" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows 0.48.0", + "windows-core", ] [[package]] @@ -2870,7 +2846,7 @@ dependencies = [ "base64 0.21.1", "js-sys", "pem", - "ring 0.17.6", + "ring", "serde", "serde_json", "simple_asn1", @@ -2908,11 +2884,11 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin 0.5.2", + "spin", ] [[package]] @@ -2974,12 +2950,6 @@ dependencies = [ "hashbrown 0.14.5", ] -[[package]] -name = "match_cfg" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" - [[package]] name = "matchers" version = "0.1.0" @@ -3072,15 +3042,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "memoffset" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" -dependencies = [ - "autocfg", -] - [[package]] name = "memoffset" version = "0.9.0" @@ -3616,7 +3577,6 @@ dependencies = [ "anyhow", "camino", "clap", - "git-version", "humantime", "pageserver", "pageserver_api", @@ -3655,12 +3615,11 @@ dependencies = [ "enumset", "fail", "futures", - "git-version", "hex", "hex-literal", "humantime", "humantime-serde", - "hyper 0.14.26", + "hyper 0.14.30", "indoc", "itertools 0.10.5", "md5", @@ -3775,7 +3734,6 @@ dependencies = [ "clap", "criterion", "futures", - "git-version", "hex-literal", "itertools 0.10.5", "once_cell", @@ -3853,7 +3811,7 @@ dependencies = [ "ahash", "bytes", "chrono", - "half 2.4.1", + "half", "hashbrown 0.14.5", "num", "num-bigint", @@ -4140,7 +4098,7 @@ dependencies = [ "crc32c", "env_logger", "log", - "memoffset 0.8.0", + "memoffset 0.9.0", "once_cell", "postgres", "regex", @@ -4345,17 +4303,16 @@ dependencies = [ "fallible-iterator", "framed-websockets", "futures", - "git-version", "hashbrown 0.14.5", "hashlink", "hex", "hmac", - "hostname 0.3.1", + "hostname", "http 1.1.0", "http-body-util", "humantime", "humantime-serde", - "hyper 0.14.26", + "hyper 0.14.30", "hyper 1.2.0", "hyper-util", "indexmap 2.0.1", @@ -4400,7 +4357,7 @@ dependencies = [ "signature 2.2.0", "smallvec", "smol_str", - "socket2 0.5.5", + "socket2", "subtle", "thiserror", "tikv-jemalloc-ctl", @@ -4578,7 +4535,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1" dependencies = [ "pem", - "ring 0.17.6", + "ring", "time", "yasna", ] @@ -4602,7 +4559,7 @@ dependencies = [ "rustls-pki-types", "ryu", "sha1_smol", - "socket2 0.5.5", + "socket2", "tokio", "tokio-rustls 0.25.0", "tokio-util", @@ -4714,7 +4671,7 @@ dependencies = [ "futures-util", "http-types", "humantime-serde", - "hyper 0.14.26", + "hyper 0.14.30", "itertools 0.10.5", "metrics", "once_cell", @@ -4747,7 +4704,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.9", "http-body 0.4.5", - "hyper 0.14.26", + "hyper 0.14.30", "hyper-rustls 0.24.0", "ipnet", "js-sys", @@ -4905,21 +4862,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - [[package]] name = "ring" version = "0.17.6" @@ -4929,8 +4871,8 @@ dependencies = [ "cc", "getrandom 0.2.11", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.48.0", ] @@ -4950,7 +4892,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "496c1d3718081c45ba9c31fbfc07417900aa96f4070ff90dc29961836b7a9945" dependencies = [ "http 0.2.9", - "hyper 0.14.26", + "hyper 0.14.30", "lazy_static", "percent-encoding", "regex", @@ -5074,7 +5016,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" dependencies = [ "log", - "ring 0.17.6", + "ring", "rustls-webpki 0.101.7", "sct", ] @@ -5086,7 +5028,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", - "ring 0.17.6", + "ring", "rustls-pki-types", "rustls-webpki 0.102.2", "subtle", @@ -5143,24 +5085,14 @@ version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8" -[[package]] -name = "rustls-webpki" -version = "0.100.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e98ff011474fa39949b7e5c0428f9b4937eda7da7848bbb947786b7be0b27dab" -dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", -] - [[package]] name = "rustls-webpki" version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.6", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -5169,9 +5101,9 @@ version = "0.102.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" dependencies = [ - "ring 0.17.6", + "ring", "rustls-pki-types", - "untrusted 0.9.0", + "untrusted", ] [[package]] @@ -5202,10 +5134,9 @@ dependencies = [ "desim", "fail", "futures", - "git-version", "hex", "humantime", - "hyper 0.14.26", + "hyper 0.14.30", "metrics", "once_cell", "parking_lot 0.12.1", @@ -5262,11 +5193,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys 0.42.0", + "windows-sys 0.52.0", ] [[package]] @@ -5290,8 +5221,8 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.6", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -5400,7 +5331,7 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a" dependencies = [ - "hostname 0.4.0", + "hostname", "libc", "os_info", "rustc_version", @@ -5712,16 +5643,6 @@ dependencies = [ "serde", ] -[[package]] -name = "socket2" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "socket2" version = "0.5.5" @@ -5732,12 +5653,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -5781,9 +5696,8 @@ dependencies = [ "futures", "futures-core", "futures-util", - "git-version", "humantime", - "hyper 0.14.26", + "hyper 0.14.30", "metrics", "once_cell", "parking_lot 0.12.1", @@ -5809,10 +5723,9 @@ dependencies = [ "diesel_migrations", "fail", "futures", - "git-version", "hex", "humantime", - "hyper 0.14.26", + "hyper 0.14.30", "itertools 0.10.5", "lasso", "measured", @@ -5862,7 +5775,6 @@ dependencies = [ "either", "futures", "futures-util", - "git-version", "hex", "humantime", "itertools 0.10.5", @@ -6228,7 +6140,7 @@ dependencies = [ "num_cpus", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.5", + "socket2", "tokio-macros", "windows-sys 0.48.0", ] @@ -6288,7 +6200,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "socket2 0.5.5", + "socket2", "tokio", "tokio-util", ] @@ -6300,7 +6212,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ea13f22eda7127c827983bdaf0d7fff9df21c8817bab02815ac277a21143677" dependencies = [ "futures", - "ring 0.17.6", + "ring", "rustls 0.22.4", "tokio", "tokio-postgres", @@ -6434,7 +6346,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.9", "http-body 0.4.5", - "hyper 0.14.26", + "hyper 0.14.30", "hyper-timeout", "percent-encoding", "pin-project", @@ -6611,7 +6523,7 @@ dependencies = [ name = "tracing-utils" version = "0.1.0" dependencies = [ - "hyper 0.14.26", + "hyper 0.14.30", "opentelemetry", "opentelemetry-otlp", "opentelemetry-semantic-conventions", @@ -6714,12 +6626,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -6728,17 +6634,18 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.7.1" +version = "2.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9" +checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" dependencies = [ - "base64 0.21.1", + "base64 0.22.1", "log", "once_cell", - "rustls 0.21.11", - "rustls-webpki 0.100.2", + "rustls 0.22.4", + "rustls-pki-types", + "rustls-webpki 0.102.2", "url", - "webpki-roots 0.23.1", + "webpki-roots 0.26.1", ] [[package]] @@ -6799,10 +6706,11 @@ dependencies = [ "criterion", "fail", "futures", + "git-version", "hex", "hex-literal", "humantime", - "hyper 0.14.26", + "hyper 0.14.30", "jsonwebtoken", "metrics", "nix 0.27.1", @@ -6837,11 +6745,10 @@ dependencies = [ [[package]] name = "uuid" -version = "1.6.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ - "atomic", "getrandom 0.2.11", "serde", ] @@ -7075,15 +6982,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki-roots" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338" -dependencies = [ - "rustls-webpki 0.100.2", -] - [[package]] name = "webpki-roots" version = "0.25.2" @@ -7152,15 +7050,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets 0.48.0", -] - [[package]] name = "windows" version = "0.52.0" @@ -7180,21 +7069,6 @@ dependencies = [ "windows-targets 0.52.4", ] -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -7243,12 +7117,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.4", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.0" @@ -7261,12 +7129,6 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.0" @@ -7279,12 +7141,6 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.0" @@ -7297,12 +7153,6 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.0" @@ -7315,12 +7165,6 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.0" @@ -7333,12 +7177,6 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.0" @@ -7351,12 +7189,6 @@ version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.0" @@ -7433,10 +7265,11 @@ dependencies = [ "futures-util", "generic-array", "getrandom 0.2.11", + "half", "hashbrown 0.14.5", "hex", "hmac", - "hyper 0.14.26", + "hyper 0.14.30", "indexmap 1.9.3", "itertools 0.10.5", "itertools 0.12.1", @@ -7504,7 +7337,7 @@ dependencies = [ "der 0.7.8", "hex", "pem", - "ring 0.17.6", + "ring", "signature 2.2.0", "spki 0.7.3", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index fd1d4e016c..a788dcf3cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -76,8 +76,6 @@ clap = { version = "4.0", features = ["derive"] } comfy-table = "7.1" const_format = "0.2" crc32c = "0.6" -crossbeam-deque = "0.8.5" -crossbeam-utils = "0.8.5" dashmap = { version = "5.5.0", features = ["raw-api"] } either = "1.8" enum-map = "2.4.2" @@ -95,7 +93,7 @@ hdrhistogram = "7.5.2" hex = "0.4" hex-literal = "0.4" hmac = "0.12.1" -hostname = "0.3.1" +hostname = "0.4" http = {version = "1.1.0", features = ["std"]} http-types = { version = "2", default-features = false } humantime = "2.1" @@ -104,7 +102,6 @@ hyper = "0.14" tokio-tungstenite = "0.20.0" indexmap = "2" indoc = "2" -inotify = "0.10.2" ipnet = "2.9.0" itertools = "0.10" jsonwebtoken = "9" @@ -113,7 +110,7 @@ libc = "0.2" md5 = "0.7.0" measured = { version = "0.0.22", features=["lasso"] } measured-process = { version = "0.0.22" } -memoffset = "0.8" +memoffset = "0.9" nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] } notify = "6.0.0" num_cpus = "1.15" @@ -142,7 +139,6 @@ rpds = "0.13" rustc-hash = "1.1.0" rustls = "0.22" rustls-pemfile = "2" -rustls-split = "0.3" scopeguard = "1.1" sysinfo = "0.29.2" sd-notify = "0.4.1" @@ -164,7 +160,6 @@ strum_macros = "0.26" svg_fmt = "0.4.3" sync_wrapper = "0.1.2" tar = "0.4" -task-local-extensions = "0.1.4" test-context = "0.3" thiserror = "1.0" tikv-jemallocator = "0.5" diff --git a/Dockerfile.compute-node b/compute/Dockerfile.compute-node similarity index 94% rename from Dockerfile.compute-node rename to compute/Dockerfile.compute-node index 6bf6fb650f..2c647a669c 100644 --- a/Dockerfile.compute-node +++ b/compute/Dockerfile.compute-node @@ -3,13 +3,15 @@ ARG REPOSITORY=neondatabase ARG IMAGE=build-tools ARG TAG=pinned ARG BUILD_TAG +ARG DEBIAN_FLAVOR=bullseye-slim ######################################################################################### # # Layer "build-deps" # ######################################################################################### -FROM debian:bullseye-slim AS build-deps +FROM debian:$DEBIAN_FLAVOR AS build-deps +ARG DEBIAN_FLAVOR RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \ @@ -280,7 +282,7 @@ FROM build-deps AS vector-pg-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -COPY patches/pgvector.patch /pgvector.patch +COPY compute/patches/pgvector.patch /pgvector.patch # By default, pgvector Makefile uses `-march=native`. We don't want that, # because we build the images on different machines than where we run them. @@ -366,7 +368,7 @@ FROM build-deps AS rum-pg-build ARG PG_VERSION COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ -COPY patches/rum.patch /rum.patch +COPY compute/patches/rum.patch /rum.patch RUN case "${PG_VERSION}" in "v17") \ echo "v17 extensions are not supported yet. Quit" && exit 0;; \ @@ -1027,10 +1029,47 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de # ######################################################################################### -FROM debian:bullseye-slim AS compute-tools-image +FROM debian:$DEBIAN_FLAVOR AS compute-tools-image +ARG DEBIAN_FLAVOR COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl +######################################################################################### +# +# Layer "pgbouncer" +# +######################################################################################### + +FROM debian:$DEBIAN_FLAVOR AS pgbouncer +ARG DEBIAN_FLAVOR +RUN set -e \ + && apt-get update \ + && apt-get install -y \ + build-essential \ + git \ + libevent-dev \ + libtool \ + pkg-config + +# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) +ENV PGBOUNCER_TAG=pgbouncer_1_22_1 +RUN set -e \ + && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \ + && cd pgbouncer \ + && ./autogen.sh \ + && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \ + && make -j $(nproc) dist_man_MANS= \ + && make install dist_man_MANS= + +######################################################################################### +# +# Layers "postgres-exporter" and "sql-exporter" +# +######################################################################################### + +FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter +FROM burningalchemist/sql_exporter:0.13 AS sql-exporter + ######################################################################################### # # Clean up postgres folder before inclusion @@ -1078,7 +1117,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src COPY --from=rum-pg-build /rum.tar.gz /ext-src -COPY patches/rum.patch /ext-src +COPY compute/patches/rum.patch /ext-src #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src COPY --from=prefix-pg-build /prefix.tar.gz /ext-src @@ -1086,9 +1125,9 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src -COPY patches/pg_hint_plan.patch /ext-src +COPY compute/patches/pg_hint_plan.patch /ext-src COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src -COPY patches/pg_cron.patch /ext-src +COPY compute/patches/pg_cron.patch /ext-src #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src #COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src @@ -1097,7 +1136,7 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src -COPY patches/pg_anon.patch /ext-src +COPY compute/patches/pg_anon.patch /ext-src COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src RUN case "${PG_VERSION}" in "v17") \ @@ -1144,7 +1183,9 @@ ENV PGDATABASE=postgres # Put it all together into the final image # ######################################################################################### -FROM debian:bullseye-slim +FROM debian:$DEBIAN_FLAVOR +ARG DEBIAN_FLAVOR +ENV DEBIAN_FLAVOR=$DEBIAN_FLAVOR # Add user postgres RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ echo "postgres:test_console_pass" | chpasswd && \ @@ -1160,23 +1201,50 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl +# pgbouncer and its config +COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer +COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini + +# Metrics exporter binaries and configuration files +COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter +COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter + +COPY --chmod=0644 compute/etc/sql_exporter.yml /etc/sql_exporter.yml +COPY --chmod=0644 compute/etc/neon_collector.yml /etc/neon_collector.yml +COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml +COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml + # Create remote extension download directory RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions # Install: # libreadline8 for psql -# libicu67, locales for collations (including ICU and plpgsql_check) # liblz4-1 for lz4 # libossp-uuid16 for extension ossp-uuid -# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS +# libgeos, libsfcgal1, and libprotobuf-c1 for PostGIS # libxml2, libxslt1.1 for xml2 # libzstd1 for zstd # libboost* for rdkit # ca-certificates for communicating with s3 by compute_ctl -RUN apt update && \ + + +RUN apt update && \ + case $DEBIAN_FLAVOR in \ + # Version-specific installs for Bullseye (PG14-PG16): + # libicu67, locales for collations (including ICU and plpgsql_check) + # libgdal28, libproj19 for PostGIS + bullseye*) \ + VERSION_INSTALLS="libicu67 libgdal28 libproj19"; \ + ;; \ + # Version-specific installs for Bookworm (PG17): + # libicu72, locales for collations (including ICU and plpgsql_check) + # libgdal32, libproj25 for PostGIS + bookworm*) \ + VERSION_INSTALLS="libicu72 libgdal32 libproj25"; \ + ;; \ + esac && \ apt install --no-install-recommends -y \ gdb \ - libicu67 \ liblz4-1 \ libreadline8 \ libboost-iostreams1.74.0 \ @@ -1185,8 +1253,6 @@ RUN apt update && \ libboost-system1.74.0 \ libossp-uuid16 \ libgeos-c1v5 \ - libgdal28 \ - libproj19 \ libprotobuf-c1 \ libsfcgal1 \ libxml2 \ @@ -1195,7 +1261,8 @@ RUN apt update && \ libcurl4-openssl-dev \ locales \ procps \ - ca-certificates && \ + ca-certificates \ + $VERSION_INSTALLS && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 diff --git a/compute/README.md b/compute/README.md new file mode 100644 index 0000000000..bb1e42ab53 --- /dev/null +++ b/compute/README.md @@ -0,0 +1,21 @@ +This directory contains files that are needed to build the compute +images, or included in the compute images. + +Dockerfile.compute-node + To build the compute image + +vm-image-spec.yaml + Instructions for vm-builder, to turn the compute-node image into + corresponding vm-compute-node image. + +etc/ + Configuration files included in /etc in the compute image + +patches/ + Some extensions need to be patched to work with Neon. This + directory contains such patches. They are applied to the extension + sources in Dockerfile.compute-node + +In addition to these, postgres itself, the neon postgres extension, +and compute_ctl are built and copied into the compute image by +Dockerfile.compute-node. diff --git a/compute/etc/neon_collector.yml b/compute/etc/neon_collector.yml new file mode 100644 index 0000000000..acb17d3cc0 --- /dev/null +++ b/compute/etc/neon_collector.yml @@ -0,0 +1,246 @@ +collector_name: neon_collector +metrics: +- metric_name: lfc_misses + type: gauge + help: 'lfc_misses' + key_labels: + values: [lfc_misses] + query: | + select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; + +- metric_name: lfc_used + type: gauge + help: 'LFC chunks used (chunk = 1MB)' + key_labels: + values: [lfc_used] + query: | + select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; + +- metric_name: lfc_hits + type: gauge + help: 'lfc_hits' + key_labels: + values: [lfc_hits] + query: | + select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; + +- metric_name: lfc_writes + type: gauge + help: 'lfc_writes' + key_labels: + values: [lfc_writes] + query: | + select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; + +- metric_name: lfc_cache_size_limit + type: gauge + help: 'LFC cache size limit in bytes' + key_labels: + values: [lfc_cache_size_limit] + query: | + select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; + +- metric_name: connection_counts + type: gauge + help: 'Connection counts' + key_labels: + - datname + - state + values: [count] + query: | + select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state; + +- metric_name: pg_stats_userdb + type: gauge + help: 'Stats for several oldest non-system dbs' + key_labels: + - datname + value_label: kind + values: + - db_size + - deadlocks + # Rows + - inserted + - updated + - deleted + # We export stats for 10 non-system database. Without this limit + # it is too easy to abuse the system by creating lots of databases. + query: | + select pg_database_size(datname) as db_size, deadlocks, + tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted, + datname + from pg_stat_database + where datname IN ( + select datname + from pg_database + where datname <> 'postgres' and not datistemplate + order by oid + limit 10 + ); + +- metric_name: max_cluster_size + type: gauge + help: 'neon.max_cluster_size setting' + key_labels: + values: [max_cluster_size] + query: | + select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size'; + +- metric_name: db_total_size + type: gauge + help: 'Size of all databases' + key_labels: + values: [total] + query: | + select sum(pg_database_size(datname)) as total from pg_database; + +# DEPRECATED +- metric_name: lfc_approximate_working_set_size + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: + values: [approximate_working_set_size] + query: | + select neon.approximate_working_set_size(false) as approximate_working_set_size; + +- metric_name: lfc_approximate_working_set_size_windows + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: [duration] + values: [size] + # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection + # of durations in a pretty-printed form. + query: | + select + x as duration, + neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size + from + (values ('5m'),('15m'),('1h')) as t (x); + +- metric_name: compute_current_lsn + type: gauge + help: 'Current LSN of the database' + key_labels: + values: [lsn] + query: | + select + case + when pg_catalog.pg_is_in_recovery() + then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8 + else (pg_current_wal_lsn() - '0/0')::FLOAT8 + end as lsn; + +- metric_name: compute_receive_lsn + type: gauge + help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication' + key_labels: + values: [lsn] + query: | + SELECT + CASE + WHEN pg_catalog.pg_is_in_recovery() + THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8 + ELSE 0 + END AS lsn; + +- metric_name: replication_delay_bytes + type: gauge + help: 'Bytes between received and replayed LSN' + key_labels: + values: [replication_delay_bytes] + # We use a GREATEST call here because this calculation can be negative. + # The calculation is not atomic, meaning after we've gotten the receive + # LSN, the replay LSN may have advanced past the receive LSN we + # are using for the calculation. + query: | + SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes; + +- metric_name: replication_delay_seconds + type: gauge + help: 'Time since last LSN was replayed' + key_labels: + values: [replication_delay_seconds] + query: | + SELECT + CASE + WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 + ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())) + END AS replication_delay_seconds; + +- metric_name: checkpoints_req + type: gauge + help: 'Number of requested checkpoints' + key_labels: + values: [checkpoints_req] + query: | + SELECT checkpoints_req FROM pg_stat_bgwriter; + +- metric_name: checkpoints_timed + type: gauge + help: 'Number of scheduled checkpoints' + key_labels: + values: [checkpoints_timed] + query: | + SELECT checkpoints_timed FROM pg_stat_bgwriter; + +- metric_name: compute_logical_snapshot_files + type: gauge + help: 'Number of snapshot files in pg_logical/snapshot' + key_labels: + - timeline_id + values: [num_logical_snapshot_files] + query: | + SELECT + (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id, + -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These + -- temporary snapshot files are renamed to the actual snapshot files after they are + -- completely built. We only WAL-log the completely built snapshot files. + (SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files; + +# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats. +# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly. + +# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad. +- metric_name: logical_slot_restart_lsn + type: gauge + help: 'restart_lsn of logical slots' + key_labels: + - slot_name + values: [restart_lsn] + query: | + select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn + from pg_replication_slots + where slot_type = 'logical'; + +- metric_name: compute_subscriptions_count + type: gauge + help: 'Number of logical replication subscriptions grouped by enabled/disabled' + key_labels: + - enabled + values: [subscriptions_count] + query: | + select subenabled::text as enabled, count(*) as subscriptions_count + from pg_subscription + group by subenabled; + +- metric_name: retained_wal + type: gauge + help: 'Retained WAL in inactive replication slots' + key_labels: + - slot_name + values: [retained_wal] + query: | + SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal + FROM pg_replication_slots + WHERE active = false; + +- metric_name: wal_is_lost + type: gauge + help: 'Whether or not the replication slot wal_status is lost' + key_labels: + - slot_name + values: [wal_is_lost] + query: | + SELECT slot_name, + CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost + FROM pg_replication_slots; diff --git a/compute/etc/neon_collector_autoscaling.yml b/compute/etc/neon_collector_autoscaling.yml new file mode 100644 index 0000000000..5616264eba --- /dev/null +++ b/compute/etc/neon_collector_autoscaling.yml @@ -0,0 +1,55 @@ +collector_name: neon_collector_autoscaling +metrics: +- metric_name: lfc_misses + type: gauge + help: 'lfc_misses' + key_labels: + values: [lfc_misses] + query: | + select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; + +- metric_name: lfc_used + type: gauge + help: 'LFC chunks used (chunk = 1MB)' + key_labels: + values: [lfc_used] + query: | + select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; + +- metric_name: lfc_hits + type: gauge + help: 'lfc_hits' + key_labels: + values: [lfc_hits] + query: | + select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; + +- metric_name: lfc_writes + type: gauge + help: 'lfc_writes' + key_labels: + values: [lfc_writes] + query: | + select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; + +- metric_name: lfc_cache_size_limit + type: gauge + help: 'LFC cache size limit in bytes' + key_labels: + values: [lfc_cache_size_limit] + query: | + select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; + +- metric_name: lfc_approximate_working_set_size_windows + type: gauge + help: 'Approximate working set size in pages of 8192 bytes' + key_labels: [duration_seconds] + values: [size] + # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set + # size looking back 1..60 minutes, labeled with the number of minutes. + query: | + select + x::text as duration_seconds, + neon.approximate_working_set_size_seconds(x) as size + from + (select generate_series * 60 as x from generate_series(1, 60)) as t (x); diff --git a/compute/etc/pgbouncer.ini b/compute/etc/pgbouncer.ini new file mode 100644 index 0000000000..cb994f961c --- /dev/null +++ b/compute/etc/pgbouncer.ini @@ -0,0 +1,17 @@ +[databases] +*=host=localhost port=5432 auth_user=cloud_admin +[pgbouncer] +listen_port=6432 +listen_addr=0.0.0.0 +auth_type=scram-sha-256 +auth_user=cloud_admin +auth_dbname=postgres +client_tls_sslmode=disable +server_tls_sslmode=disable +pool_mode=transaction +max_client_conn=10000 +default_pool_size=64 +max_prepared_statements=0 +admin_users=postgres +unix_socket_dir=/tmp/ +unix_socket_mode=0777 diff --git a/compute/etc/sql_exporter.yml b/compute/etc/sql_exporter.yml new file mode 100644 index 0000000000..139d04468a --- /dev/null +++ b/compute/etc/sql_exporter.yml @@ -0,0 +1,33 @@ +# Configuration for sql_exporter +# Global defaults. +global: + # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. + scrape_timeout: 10s + # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. + scrape_timeout_offset: 500ms + # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. + min_interval: 0s + # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, + # as will concurrent scrapes. + max_connections: 1 + # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should + # always be the same as max_connections. + max_idle_connections: 1 + # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. + # If 0, connections are not closed due to a connection's age. + max_connection_lifetime: 5m + +# The target to monitor and the collectors to execute on it. +target: + # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) + # the schema gets dropped or replaced to match the driver expected DSN format. + data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' + + # Collectors (referenced by name) to execute on the target. + # Glob patterns are supported (see for syntax). + collectors: [neon_collector] + +# Collector files specifies a list of globs. One collector definition is read from each matching file. +# Glob patterns are supported (see for syntax). +collector_files: + - "neon_collector.yml" diff --git a/compute/etc/sql_exporter_autoscaling.yml b/compute/etc/sql_exporter_autoscaling.yml new file mode 100644 index 0000000000..044557233e --- /dev/null +++ b/compute/etc/sql_exporter_autoscaling.yml @@ -0,0 +1,33 @@ +# Configuration for sql_exporter for autoscaling-agent +# Global defaults. +global: + # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. + scrape_timeout: 10s + # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. + scrape_timeout_offset: 500ms + # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. + min_interval: 0s + # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, + # as will concurrent scrapes. + max_connections: 1 + # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should + # always be the same as max_connections. + max_idle_connections: 1 + # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. + # If 0, connections are not closed due to a connection's age. + max_connection_lifetime: 5m + +# The target to monitor and the collectors to execute on it. +target: + # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) + # the schema gets dropped or replaced to match the driver expected DSN format. + data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' + + # Collectors (referenced by name) to execute on the target. + # Glob patterns are supported (see for syntax). + collectors: [neon_collector_autoscaling] + +# Collector files specifies a list of globs. One collector definition is read from each matching file. +# Glob patterns are supported (see for syntax). +collector_files: + - "neon_collector_autoscaling.yml" diff --git a/compute/patches/cloud_regress_pg16.patch b/compute/patches/cloud_regress_pg16.patch new file mode 100644 index 0000000000..d15d0cffeb --- /dev/null +++ b/compute/patches/cloud_regress_pg16.patch @@ -0,0 +1,3949 @@ +diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out +index 0c24f6afe4..dd808ac2b4 100644 +--- a/src/test/regress/expected/aggregates.out ++++ b/src/test/regress/expected/aggregates.out +@@ -11,7 +11,8 @@ CREATE TABLE aggtest ( + b float4 + ); + \set filename :abs_srcdir '/data/agg.data' +-COPY aggtest FROM :'filename'; ++\set command '\\copy aggtest FROM ' :'filename'; ++:command + ANALYZE aggtest; + SELECT avg(four) AS avg_1 FROM onek; + avg_1 +diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out +index ae54cb254f..888e2ee8bc 100644 +--- a/src/test/regress/expected/alter_generic.out ++++ b/src/test/regress/expected/alter_generic.out +@@ -15,9 +15,9 @@ DROP ROLE IF EXISTS regress_alter_generic_user1; + DROP ROLE IF EXISTS regress_alter_generic_user2; + DROP ROLE IF EXISTS regress_alter_generic_user3; + RESET client_min_messages; +-CREATE USER regress_alter_generic_user3; +-CREATE USER regress_alter_generic_user2; +-CREATE USER regress_alter_generic_user1 IN ROLE regress_alter_generic_user3; ++CREATE USER regress_alter_generic_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_alter_generic_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_alter_generic_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE regress_alter_generic_user3; + CREATE SCHEMA alt_nsp1; + CREATE SCHEMA alt_nsp2; + GRANT ALL ON SCHEMA alt_nsp1, alt_nsp2 TO public; +@@ -370,7 +370,7 @@ ERROR: STORAGE cannot be specified in ALTER OPERATOR FAMILY + DROP OPERATOR FAMILY alt_opf4 USING btree; + -- Should fail. Need to be SUPERUSER to do ALTER OPERATOR FAMILY .. ADD / DROP + BEGIN TRANSACTION; +-CREATE ROLE regress_alter_generic_user5 NOSUPERUSER; ++CREATE ROLE regress_alter_generic_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER NOSUPERUSER; + CREATE OPERATOR FAMILY alt_opf5 USING btree; + SET ROLE regress_alter_generic_user5; + ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2); +@@ -382,7 +382,7 @@ ERROR: current transaction is aborted, commands ignored until end of transactio + ROLLBACK; + -- Should fail. Need rights to namespace for ALTER OPERATOR FAMILY .. ADD / DROP + BEGIN TRANSACTION; +-CREATE ROLE regress_alter_generic_user6; ++CREATE ROLE regress_alter_generic_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA alt_nsp6; + REVOKE ALL ON SCHEMA alt_nsp6 FROM regress_alter_generic_user6; + CREATE OPERATOR FAMILY alt_nsp6.alt_opf6 USING btree; +diff --git a/src/test/regress/expected/alter_operator.out b/src/test/regress/expected/alter_operator.out +index 71bd484282..066ea4ec0d 100644 +--- a/src/test/regress/expected/alter_operator.out ++++ b/src/test/regress/expected/alter_operator.out +@@ -127,7 +127,7 @@ ERROR: operator attribute "Restrict" not recognized + -- + -- Test permission check. Must be owner to ALTER OPERATOR. + -- +-CREATE USER regress_alter_op_user; ++CREATE USER regress_alter_op_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_alter_op_user; + ALTER OPERATOR === (boolean, boolean) SET (RESTRICT = NONE); + ERROR: must be owner of operator === +diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out +index 0e439a6488..393f316c3e 100644 +--- a/src/test/regress/expected/alter_table.out ++++ b/src/test/regress/expected/alter_table.out +@@ -5,7 +5,7 @@ + SET client_min_messages TO 'warning'; + DROP ROLE IF EXISTS regress_alter_table_user1; + RESET client_min_messages; +-CREATE USER regress_alter_table_user1; ++CREATE USER regress_alter_table_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- + -- add attribute + -- +@@ -3896,8 +3896,8 @@ DROP TABLE fail_part; + ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1); + ERROR: relation "nonexistent" does not exist + -- check ownership of the source table +-CREATE ROLE regress_test_me; +-CREATE ROLE regress_test_not_me; ++CREATE ROLE regress_test_me PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_not_me PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE not_owned_by_me (LIKE list_parted); + ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; + SET SESSION AUTHORIZATION regress_test_me; +diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out +index 57a283dc59..9672d526b4 100644 +--- a/src/test/regress/expected/arrays.out ++++ b/src/test/regress/expected/arrays.out +@@ -18,7 +18,8 @@ CREATE TABLE array_op_test ( + t text[] + ); + \set filename :abs_srcdir '/data/array.data' +-COPY array_op_test FROM :'filename'; ++\set command '\\copy array_op_test FROM ' :'filename'; ++:command + ANALYZE array_op_test; + -- + -- only the 'e' array is 0-based, the others are 1-based. +diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out +index 93ed5e8cc0..54bd7d535c 100644 +--- a/src/test/regress/expected/btree_index.out ++++ b/src/test/regress/expected/btree_index.out +@@ -20,13 +20,17 @@ CREATE TABLE bt_f8_heap ( + random int4 + ); + \set filename :abs_srcdir '/data/desc.data' +-COPY bt_i4_heap FROM :'filename'; ++\set command '\\copy bt_i4_heap FROM ' :'filename'; ++:command + \set filename :abs_srcdir '/data/hash.data' +-COPY bt_name_heap FROM :'filename'; ++\set command '\\copy bt_name_heap FROM ' :'filename'; ++:command + \set filename :abs_srcdir '/data/desc.data' +-COPY bt_txt_heap FROM :'filename'; ++\set command '\\copy bt_txt_heap FROM ' :'filename'; ++:command + \set filename :abs_srcdir '/data/hash.data' +-COPY bt_f8_heap FROM :'filename'; ++\set command '\\copy bt_f8_heap FROM ' :'filename'; ++:command + ANALYZE bt_i4_heap; + ANALYZE bt_name_heap; + ANALYZE bt_txt_heap; +diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out +index 542c2e098c..0062d3024f 100644 +--- a/src/test/regress/expected/cluster.out ++++ b/src/test/regress/expected/cluster.out +@@ -308,7 +308,7 @@ WHERE pg_class.oid=indexrelid + -- Verify that toast tables are clusterable + CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index; + -- Verify that clustering all tables does in fact cluster the right ones +-CREATE USER regress_clstr_user; ++CREATE USER regress_clstr_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE clstr_1 (a INT PRIMARY KEY); + CREATE TABLE clstr_2 (a INT PRIMARY KEY); + CREATE TABLE clstr_3 (a INT PRIMARY KEY); +@@ -497,7 +497,7 @@ DROP TABLE clstrpart; + CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i); + CREATE INDEX ptnowner_i_idx ON ptnowner(i); + CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1); +-CREATE ROLE regress_ptnowner; ++CREATE ROLE regress_ptnowner PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2); + ALTER TABLE ptnowner1 OWNER TO regress_ptnowner; + ALTER TABLE ptnowner OWNER TO regress_ptnowner; +diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out +index 97bbe53b64..eac3d42a79 100644 +--- a/src/test/regress/expected/collate.icu.utf8.out ++++ b/src/test/regress/expected/collate.icu.utf8.out +@@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC'; + + reset enable_seqscan; + -- schema manipulation commands +-CREATE ROLE regress_test_role; ++CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA test_schema; + -- We need to do this this way to cope with varying names for encodings: + SET client_min_messages TO WARNING; +diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out +index cf0b80d616..e8e2a14a4a 100644 +--- a/src/test/regress/expected/constraints.out ++++ b/src/test/regress/expected/constraints.out +@@ -349,7 +349,8 @@ CREATE TABLE COPY_TBL (x INT, y TEXT, z INT, + CONSTRAINT COPY_CON + CHECK (x > 3 AND y <> 'check failed' AND x < 7 )); + \set filename :abs_srcdir '/data/constro.data' +-COPY COPY_TBL FROM :'filename'; ++\set command '\\copy COPY_TBL FROM ' :'filename'; ++:command + SELECT * FROM COPY_TBL; + x | y | z + ---+---------------+--- +@@ -358,7 +359,8 @@ SELECT * FROM COPY_TBL; + (2 rows) + + \set filename :abs_srcdir '/data/constrf.data' +-COPY COPY_TBL FROM :'filename'; ++\set command '\\copy COPY_TBL FROM ' :'filename'; ++:command + ERROR: new row for relation "copy_tbl" violates check constraint "copy_con" + DETAIL: Failing row contains (7, check failed, 6). + CONTEXT: COPY copy_tbl, line 2: "7 check failed 6" +@@ -799,7 +801,7 @@ DETAIL: Key (f1)=(3) conflicts with key (f1)=(3). + DROP TABLE deferred_excl; + -- Comments + -- Setup a low-level role to enforce non-superuser checks. +-CREATE ROLE regress_constraint_comments; ++CREATE ROLE regress_constraint_comments PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_constraint_comments; + CREATE TABLE constraint_comments_tbl (a int CONSTRAINT the_constraint CHECK (a > 0)); + CREATE DOMAIN constraint_comments_dom AS int CONSTRAINT the_constraint CHECK (value > 0); +@@ -819,7 +821,7 @@ COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS NULL; + COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS NULL; + -- unauthorized user + RESET SESSION AUTHORIZATION; +-CREATE ROLE regress_constraint_comments_noaccess; ++CREATE ROLE regress_constraint_comments_noaccess PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_constraint_comments_noaccess; + COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS 'no, the comment'; + ERROR: must be owner of relation constraint_comments_tbl +diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out +index 442e7aff2b..525f732b03 100644 +--- a/src/test/regress/expected/conversion.out ++++ b/src/test/regress/expected/conversion.out +@@ -8,7 +8,7 @@ + CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea) + AS :'regresslib', 'test_enc_conversion' + LANGUAGE C STRICT; +-CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE; ++CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_conversion_user; + CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8; + -- +diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out +index b48365ec98..a6ef910055 100644 +--- a/src/test/regress/expected/copy.out ++++ b/src/test/regress/expected/copy.out +@@ -15,9 +15,11 @@ insert into copytest values('Unix',E'abc\ndef',2); + insert into copytest values('Mac',E'abc\rdef',3); + insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); + \set filename :abs_builddir '/results/copytest.csv' +-copy copytest to :'filename' csv; ++\set command '\\copy copytest to ' :'filename' csv; ++:command + create temp table copytest2 (like copytest); +-copy copytest2 from :'filename' csv; ++\set command '\\copy copytest2 from ' :'filename' csv; ++:command + select * from copytest except select * from copytest2; + style | test | filler + -------+------+-------- +@@ -25,8 +27,10 @@ select * from copytest except select * from copytest2; + + truncate copytest2; + --- same test but with an escape char different from quote char +-copy copytest to :'filename' csv quote '''' escape E'\\'; +-copy copytest2 from :'filename' csv quote '''' escape E'\\'; ++\set command '\\copy copytest to ' :'filename' ' csv quote ' '\'\'\'\'' ' escape ' 'E\'' '\\\\\''; ++:command ++\set command '\\copy copytest2 from ' :'filename' ' csv quote ' '\'\'\'\'' ' escape ' 'E\'' '\\\\\''; ++:command + select * from copytest except select * from copytest2; + style | test | filler + -------+------+-------- +@@ -66,13 +70,16 @@ insert into parted_copytest select x,1,'One' from generate_series(1,1000) x; + insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x; + insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x; + \set filename :abs_builddir '/results/parted_copytest.csv' +-copy (select * from parted_copytest order by a) to :'filename'; ++\set command '\\copy (select * from parted_copytest order by a) to ' :'filename'; ++:command + truncate parted_copytest; +-copy parted_copytest from :'filename'; ++\set command '\\copy parted_copytest from ' :'filename'; ++:command + -- Ensure COPY FREEZE errors for partitioned tables. + begin; + truncate parted_copytest; +-copy parted_copytest from :'filename' (freeze); ++\set command '\\copy parted_copytest from ' :'filename' (freeze); ++:command + ERROR: cannot perform COPY FREEZE on a partitioned table + rollback; + select tableoid::regclass,count(*),sum(a) from parted_copytest +@@ -94,7 +101,8 @@ create trigger part_ins_trig + before insert on parted_copytest_a2 + for each row + execute procedure part_ins_func(); +-copy parted_copytest from :'filename'; ++\set command '\\copy parted_copytest from ' :'filename'; ++:command + select tableoid::regclass,count(*),sum(a) from parted_copytest + group by tableoid order by tableoid::regclass::name; + tableoid | count | sum +@@ -106,7 +114,8 @@ group by tableoid order by tableoid::regclass::name; + truncate table parted_copytest; + create index on parted_copytest (b); + drop trigger part_ins_trig on parted_copytest_a2; +-copy parted_copytest from stdin; ++\set command '\\copy parted_copytest from ' stdin; ++:command + -- Ensure index entries were properly added during the copy. + select * from parted_copytest where b = 1; + a | b | c +@@ -170,9 +179,9 @@ INFO: progress: {"type": "PIPE", "command": "COPY FROM", "relname": "tab_progre + -- Generate COPY FROM report with FILE, with some excluded tuples. + truncate tab_progress_reporting; + \set filename :abs_srcdir '/data/emp.data' +-copy tab_progress_reporting from :'filename' +- where (salary < 2000); +-INFO: progress: {"type": "FILE", "command": "COPY FROM", "relname": "tab_progress_reporting", "has_bytes_total": true, "tuples_excluded": 1, "tuples_processed": 2, "has_bytes_processed": true} ++\set command '\\copy tab_progress_reporting from ' :'filename' 'where (salary < 2000)'; ++:command ++INFO: progress: {"type": "PIPE", "command": "COPY FROM", "relname": "tab_progress_reporting", "has_bytes_total": false, "tuples_excluded": 1, "tuples_processed": 2, "has_bytes_processed": true} + drop trigger check_after_tab_progress_reporting on tab_progress_reporting; + drop function notice_after_tab_progress_reporting(); + drop table tab_progress_reporting; +@@ -281,7 +290,8 @@ CREATE TABLE parted_si_p_odd PARTITION OF parted_si FOR VALUES IN (1); + -- https://postgr.es/m/18130-7a86a7356a75209d%40postgresql.org + -- https://postgr.es/m/257696.1695670946%40sss.pgh.pa.us + \set filename :abs_srcdir '/data/desc.data' +-COPY parted_si(id, data) FROM :'filename'; ++\set command '\\COPY parted_si(id, data) FROM ' :'filename'; ++:command + -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from + -- the wrong partition. This test is *not* guaranteed to trigger that bug, but + -- does so when shared_buffers is small enough. To test if we encountered the +diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out +index faf1a4d1b0..a44c97db52 100644 +--- a/src/test/regress/expected/copy2.out ++++ b/src/test/regress/expected/copy2.out +@@ -553,8 +553,8 @@ select * from check_con_tbl; + (2 rows) + + -- test with RLS enabled. +-CREATE ROLE regress_rls_copy_user; +-CREATE ROLE regress_rls_copy_user_colperms; ++CREATE ROLE regress_rls_copy_user PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_copy_user_colperms PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE rls_t1 (a int, b int, c int); + COPY rls_t1 (a, b, c) from stdin; + CREATE POLICY p1 ON rls_t1 FOR SELECT USING (a % 2 = 0); +diff --git a/src/test/regress/expected/create_function_sql.out b/src/test/regress/expected/create_function_sql.out +index 50aca5940f..42527142f6 100644 +--- a/src/test/regress/expected/create_function_sql.out ++++ b/src/test/regress/expected/create_function_sql.out +@@ -4,7 +4,7 @@ + -- Assorted tests using SQL-language functions + -- + -- All objects made in this test are in temp_func_test schema +-CREATE USER regress_unpriv_user; ++CREATE USER regress_unpriv_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA temp_func_test; + GRANT ALL ON SCHEMA temp_func_test TO public; + SET search_path TO temp_func_test, public; +diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out +index acfd9d1f4f..0eeb64e47a 100644 +--- a/src/test/regress/expected/create_index.out ++++ b/src/test/regress/expected/create_index.out +@@ -51,7 +51,8 @@ CREATE TABLE fast_emp4000 ( + home_base box + ); + \set filename :abs_srcdir '/data/rect.data' +-COPY slow_emp4000 FROM :'filename'; ++\set command '\\copy slow_emp4000 FROM ' :'filename'; ++:command + INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000; + ANALYZE slow_emp4000; + ANALYZE fast_emp4000; +@@ -655,7 +656,8 @@ CREATE TABLE array_index_op_test ( + t text[] + ); + \set filename :abs_srcdir '/data/array.data' +-COPY array_index_op_test FROM :'filename'; ++\set command '\\copy array_index_op_test FROM ' :'filename'; ++:command + ANALYZE array_index_op_test; + SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno; + seqno | i | t +@@ -2822,7 +2824,7 @@ END; + -- concurrently + REINDEX SCHEMA CONCURRENTLY schema_to_reindex; + -- Failure for unauthorized user +-CREATE ROLE regress_reindexuser NOLOGIN; ++CREATE ROLE regress_reindexuser NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION ROLE regress_reindexuser; + REINDEX SCHEMA schema_to_reindex; + ERROR: must be owner of schema schema_to_reindex +diff --git a/src/test/regress/expected/create_procedure.out b/src/test/regress/expected/create_procedure.out +index 2177ba3509..ae3ca94d00 100644 +--- a/src/test/regress/expected/create_procedure.out ++++ b/src/test/regress/expected/create_procedure.out +@@ -421,7 +421,7 @@ ERROR: cp_testfunc1(integer) is not a procedure + DROP PROCEDURE nonexistent(); + ERROR: procedure nonexistent() does not exist + -- privileges +-CREATE USER regress_cp_user1; ++CREATE USER regress_cp_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT INSERT ON cp_test TO regress_cp_user1; + REVOKE EXECUTE ON PROCEDURE ptest1(text) FROM PUBLIC; + SET ROLE regress_cp_user1; +diff --git a/src/test/regress/expected/create_role.out b/src/test/regress/expected/create_role.out +index 46d4f9efe9..fc2a28a2f6 100644 +--- a/src/test/regress/expected/create_role.out ++++ b/src/test/regress/expected/create_role.out +@@ -1,28 +1,28 @@ + -- ok, superuser can create users with any set of privileges +-CREATE ROLE regress_role_super SUPERUSER; +-CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS; ++CREATE ROLE regress_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT CREATE ON DATABASE regression TO regress_role_admin WITH GRANT OPTION; +-CREATE ROLE regress_role_limited_admin CREATEROLE; +-CREATE ROLE regress_role_normal; ++CREATE ROLE regress_role_limited_admin CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_role_normal PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- fail, CREATEROLE user can't give away role attributes without having them + SET SESSION AUTHORIZATION regress_role_limited_admin; +-CREATE ROLE regress_nosuch_superuser SUPERUSER; ++CREATE ROLE regress_nosuch_superuser SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: permission denied to create role + DETAIL: Only roles with the SUPERUSER attribute may create roles with the SUPERUSER attribute. +-CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS; ++CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: permission denied to create role + DETAIL: Only roles with the REPLICATION attribute may create roles with the REPLICATION attribute. +-CREATE ROLE regress_nosuch_replication REPLICATION; ++CREATE ROLE regress_nosuch_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: permission denied to create role + DETAIL: Only roles with the REPLICATION attribute may create roles with the REPLICATION attribute. +-CREATE ROLE regress_nosuch_bypassrls BYPASSRLS; ++CREATE ROLE regress_nosuch_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: permission denied to create role + DETAIL: Only roles with the BYPASSRLS attribute may create roles with the BYPASSRLS attribute. +-CREATE ROLE regress_nosuch_createdb CREATEDB; ++CREATE ROLE regress_nosuch_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: permission denied to create role + DETAIL: Only roles with the CREATEDB attribute may create roles with the CREATEDB attribute. + -- ok, can create a role without any special attributes +-CREATE ROLE regress_role_limited; ++CREATE ROLE regress_role_limited PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- fail, can't give it in any of the restricted attributes + ALTER ROLE regress_role_limited SUPERUSER; + ERROR: permission denied to alter role +@@ -39,10 +39,10 @@ DETAIL: Only roles with the BYPASSRLS attribute may change the BYPASSRLS attrib + DROP ROLE regress_role_limited; + -- ok, can give away these role attributes if you have them + SET SESSION AUTHORIZATION regress_role_admin; +-CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS; +-CREATE ROLE regress_replication REPLICATION; +-CREATE ROLE regress_bypassrls BYPASSRLS; +-CREATE ROLE regress_createdb CREATEDB; ++CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- ok, can toggle these role attributes off and on if you have them + ALTER ROLE regress_replication NOREPLICATION; + ALTER ROLE regress_replication REPLICATION; +@@ -58,48 +58,48 @@ ALTER ROLE regress_createdb NOSUPERUSER; + ERROR: permission denied to alter role + DETAIL: Only roles with the SUPERUSER attribute may change the SUPERUSER attribute. + -- ok, having CREATEROLE is enough to create users with these privileges +-CREATE ROLE regress_createrole CREATEROLE NOINHERIT; ++CREATE ROLE regress_createrole CREATEROLE NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT CREATE ON DATABASE regression TO regress_createrole WITH GRANT OPTION; +-CREATE ROLE regress_login LOGIN; +-CREATE ROLE regress_inherit INHERIT; +-CREATE ROLE regress_connection_limit CONNECTION LIMIT 5; +-CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo'; +-CREATE ROLE regress_password_null PASSWORD NULL; ++CREATE ROLE regress_login LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_inherit INHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_connection_limit CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- ok, backwards compatible noise words should be ignored +-CREATE ROLE regress_noiseword SYSID 12345; ++CREATE ROLE regress_noiseword SYSID 12345 PASSWORD NEON_PASSWORD_PLACEHOLDER; + NOTICE: SYSID can no longer be specified + -- fail, cannot grant membership in superuser role +-CREATE ROLE regress_nosuch_super IN ROLE regress_role_super; ++CREATE ROLE regress_nosuch_super IN ROLE regress_role_super PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: permission denied to grant role "regress_role_super" + DETAIL: Only roles with the SUPERUSER attribute may grant roles with the SUPERUSER attribute. + -- fail, database owner cannot have members +-CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner; ++CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: role "pg_database_owner" cannot have explicit members + -- ok, can grant other users into a role + CREATE ROLE regress_inroles ROLE + regress_role_super, regress_createdb, regress_createrole, regress_login, +- regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null; ++ regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- fail, cannot grant a role into itself +-CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive; ++CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: role "regress_nosuch_recursive" is a member of role "regress_nosuch_recursive" + -- ok, can grant other users into a role with admin option + CREATE ROLE regress_adminroles ADMIN + regress_role_super, regress_createdb, regress_createrole, regress_login, +- regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null; ++ regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- fail, cannot grant a role into itself with admin option +-CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive; ++CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER; + ERROR: role "regress_nosuch_admin_recursive" is a member of role "regress_nosuch_admin_recursive" + -- fail, regress_createrole does not have CREATEDB privilege + SET SESSION AUTHORIZATION regress_createrole; + CREATE DATABASE regress_nosuch_db; + ERROR: permission denied to create database + -- ok, regress_createrole can create new roles +-CREATE ROLE regress_plainrole; ++CREATE ROLE regress_plainrole PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- ok, roles with CREATEROLE can create new roles with it +-CREATE ROLE regress_rolecreator CREATEROLE; ++CREATE ROLE regress_rolecreator CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- ok, roles with CREATEROLE can create new roles with different role + -- attributes, including CREATEROLE +-CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5; ++CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- ok, we should be able to modify a role we created + COMMENT ON ROLE regress_hasprivs IS 'some comment'; + ALTER ROLE regress_hasprivs RENAME TO regress_tenant; +@@ -141,7 +141,7 @@ ERROR: permission denied to reassign objects + DETAIL: Only roles with privileges of role "regress_tenant" may reassign objects owned by it. + -- ok, create a role with a value for createrole_self_grant + SET createrole_self_grant = 'set, inherit'; +-CREATE ROLE regress_tenant2; ++CREATE ROLE regress_tenant2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT CREATE ON DATABASE regression TO regress_tenant2; + -- ok, regress_tenant2 can create objects within the database + SET SESSION AUTHORIZATION regress_tenant2; +@@ -165,34 +165,34 @@ ALTER TABLE tenant2_table OWNER TO regress_tenant2; + ERROR: must be able to SET ROLE "regress_tenant2" + DROP TABLE tenant2_table; + -- fail, CREATEROLE is not enough to create roles in privileged roles +-CREATE ROLE regress_read_all_data IN ROLE pg_read_all_data; ++CREATE ROLE regress_read_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_data; + ERROR: permission denied to grant role "pg_read_all_data" + DETAIL: Only roles with the ADMIN option on role "pg_read_all_data" may grant this role. +-CREATE ROLE regress_write_all_data IN ROLE pg_write_all_data; ++CREATE ROLE regress_write_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_all_data; + ERROR: permission denied to grant role "pg_write_all_data" + DETAIL: Only roles with the ADMIN option on role "pg_write_all_data" may grant this role. +-CREATE ROLE regress_monitor IN ROLE pg_monitor; ++CREATE ROLE regress_monitor PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_monitor; + ERROR: permission denied to grant role "pg_monitor" + DETAIL: Only roles with the ADMIN option on role "pg_monitor" may grant this role. +-CREATE ROLE regress_read_all_settings IN ROLE pg_read_all_settings; ++CREATE ROLE regress_read_all_settings PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_settings; + ERROR: permission denied to grant role "pg_read_all_settings" + DETAIL: Only roles with the ADMIN option on role "pg_read_all_settings" may grant this role. +-CREATE ROLE regress_read_all_stats IN ROLE pg_read_all_stats; ++CREATE ROLE regress_read_all_stats PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_stats; + ERROR: permission denied to grant role "pg_read_all_stats" + DETAIL: Only roles with the ADMIN option on role "pg_read_all_stats" may grant this role. +-CREATE ROLE regress_stat_scan_tables IN ROLE pg_stat_scan_tables; ++CREATE ROLE regress_stat_scan_tables PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_stat_scan_tables; + ERROR: permission denied to grant role "pg_stat_scan_tables" + DETAIL: Only roles with the ADMIN option on role "pg_stat_scan_tables" may grant this role. +-CREATE ROLE regress_read_server_files IN ROLE pg_read_server_files; ++CREATE ROLE regress_read_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_server_files; + ERROR: permission denied to grant role "pg_read_server_files" + DETAIL: Only roles with the ADMIN option on role "pg_read_server_files" may grant this role. +-CREATE ROLE regress_write_server_files IN ROLE pg_write_server_files; ++CREATE ROLE regress_write_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_server_files; + ERROR: permission denied to grant role "pg_write_server_files" + DETAIL: Only roles with the ADMIN option on role "pg_write_server_files" may grant this role. +-CREATE ROLE regress_execute_server_program IN ROLE pg_execute_server_program; ++CREATE ROLE regress_execute_server_program PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_execute_server_program; + ERROR: permission denied to grant role "pg_execute_server_program" + DETAIL: Only roles with the ADMIN option on role "pg_execute_server_program" may grant this role. +-CREATE ROLE regress_signal_backend IN ROLE pg_signal_backend; ++CREATE ROLE regress_signal_backend PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_signal_backend; + ERROR: permission denied to grant role "pg_signal_backend" + DETAIL: Only roles with the ADMIN option on role "pg_signal_backend" may grant this role. + -- fail, role still owns database objects +diff --git a/src/test/regress/expected/create_schema.out b/src/test/regress/expected/create_schema.out +index 93302a07ef..1a73f083ac 100644 +--- a/src/test/regress/expected/create_schema.out ++++ b/src/test/regress/expected/create_schema.out +@@ -2,7 +2,7 @@ + -- CREATE_SCHEMA + -- + -- Schema creation with elements. +-CREATE ROLE regress_create_schema_role SUPERUSER; ++CREATE ROLE regress_create_schema_role SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- Cases where schema creation fails as objects are qualified with a schema + -- that does not match with what's expected. + -- This checks all the object types that include schema qualifications. +diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out +index f3f8c7b5a2..3e3e54ff4c 100644 +--- a/src/test/regress/expected/create_view.out ++++ b/src/test/regress/expected/create_view.out +@@ -18,7 +18,8 @@ CREATE TABLE real_city ( + outline path + ); + \set filename :abs_srcdir '/data/real_city.data' +-COPY real_city FROM :'filename'; ++\set command '\\copy real_city FROM ' :'filename'; ++:command + ANALYZE real_city; + SELECT * + INTO TABLE ramp +diff --git a/src/test/regress/expected/database.out b/src/test/regress/expected/database.out +index 454db91ec0..01378d7081 100644 +--- a/src/test/regress/expected/database.out ++++ b/src/test/regress/expected/database.out +@@ -1,8 +1,7 @@ + CREATE DATABASE regression_tbd + ENCODING utf8 LC_COLLATE "C" LC_CTYPE "C" TEMPLATE template0; + ALTER DATABASE regression_tbd RENAME TO regression_utf8; +-ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace; +-ALTER DATABASE regression_utf8 RESET TABLESPACE; ++WARNING: you need to manually restart any running background workers after this command + ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123; + -- Test PgDatabaseToastTable. Doing this with GRANT would be slow. + BEGIN; +diff --git a/src/test/regress/expected/dependency.out b/src/test/regress/expected/dependency.out +index 6d9498cdd1..692cf979d0 100644 +--- a/src/test/regress/expected/dependency.out ++++ b/src/test/regress/expected/dependency.out +@@ -1,10 +1,10 @@ + -- + -- DEPENDENCIES + -- +-CREATE USER regress_dep_user; +-CREATE USER regress_dep_user2; +-CREATE USER regress_dep_user3; +-CREATE GROUP regress_dep_group; ++CREATE USER regress_dep_user PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE GROUP regress_dep_group PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE deptest (f1 serial primary key, f2 text); + GRANT SELECT ON TABLE deptest TO GROUP regress_dep_group; + GRANT ALL ON TABLE deptest TO regress_dep_user, regress_dep_user2; +@@ -41,9 +41,9 @@ ERROR: role "regress_dep_user3" cannot be dropped because some objects depend o + DROP TABLE deptest; + DROP USER regress_dep_user3; + -- Test DROP OWNED +-CREATE USER regress_dep_user0; +-CREATE USER regress_dep_user1; +-CREATE USER regress_dep_user2; ++CREATE USER regress_dep_user0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_dep_user0; + -- permission denied + DROP OWNED BY regress_dep_user1; +diff --git a/src/test/regress/expected/drop_if_exists.out b/src/test/regress/expected/drop_if_exists.out +index 5e44c2c3ce..eb3bb329fb 100644 +--- a/src/test/regress/expected/drop_if_exists.out ++++ b/src/test/regress/expected/drop_if_exists.out +@@ -64,9 +64,9 @@ ERROR: type "test_domain_exists" does not exist + --- + --- role/user/group + --- +-CREATE USER regress_test_u1; +-CREATE ROLE regress_test_r1; +-CREATE GROUP regress_test_g1; ++CREATE USER regress_test_u1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_r1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE GROUP regress_test_g1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + DROP USER regress_test_u2; + ERROR: role "regress_test_u2" does not exist + DROP USER IF EXISTS regress_test_u1, regress_test_u2; +diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out +index 126f7047fe..0e2cc73426 100644 +--- a/src/test/regress/expected/equivclass.out ++++ b/src/test/regress/expected/equivclass.out +@@ -384,7 +384,7 @@ set enable_nestloop = on; + set enable_mergejoin = off; + alter table ec1 enable row level security; + create policy p1 on ec1 using (f1 < '5'::int8alias1); +-create user regress_user_ectest; ++create user regress_user_ectest PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant select on ec0 to regress_user_ectest; + grant select on ec1 to regress_user_ectest; + -- without any RLS, we'll treat {a.ff, b.ff, 43} as an EquivalenceClass +diff --git a/src/test/regress/expected/event_trigger.out b/src/test/regress/expected/event_trigger.out +index 5a10958df5..a578c06ebd 100644 +--- a/src/test/regress/expected/event_trigger.out ++++ b/src/test/regress/expected/event_trigger.out +@@ -85,7 +85,7 @@ create event trigger regress_event_trigger2 on ddl_command_start + -- OK + comment on event trigger regress_event_trigger is 'test comment'; + -- drop as non-superuser should fail +-create role regress_evt_user; ++create role regress_evt_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + set role regress_evt_user; + create event trigger regress_event_trigger_noperms on ddl_command_start + execute procedure test_event_trigger(); +diff --git a/src/test/regress/expected/foreign_data.out b/src/test/regress/expected/foreign_data.out +index 6ed50fdcfa..caa00a345d 100644 +--- a/src/test/regress/expected/foreign_data.out ++++ b/src/test/regress/expected/foreign_data.out +@@ -14,13 +14,13 @@ CREATE FUNCTION test_fdw_handler() + SET client_min_messages TO 'warning'; + DROP ROLE IF EXISTS regress_foreign_data_user, regress_test_role, regress_test_role2, regress_test_role_super, regress_test_indirect, regress_unprivileged_role; + RESET client_min_messages; +-CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER; ++CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION 'regress_foreign_data_user'; +-CREATE ROLE regress_test_role; +-CREATE ROLE regress_test_role2; +-CREATE ROLE regress_test_role_super SUPERUSER; +-CREATE ROLE regress_test_indirect; +-CREATE ROLE regress_unprivileged_role; ++CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_indirect PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_unprivileged_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE FOREIGN DATA WRAPPER dummy; + COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless'; + CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator; +diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out +index 12e523c737..8872e23935 100644 +--- a/src/test/regress/expected/foreign_key.out ++++ b/src/test/regress/expected/foreign_key.out +@@ -1968,7 +1968,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2 + FOR VALUES IN (1600); + -- leave these tables around intentionally + -- test the case when the referenced table is owned by a different user +-create role regress_other_partitioned_fk_owner; ++create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant references on fk_notpartitioned_pk to regress_other_partitioned_fk_owner; + set role regress_other_partitioned_fk_owner; + create table other_partitioned_fk(a int, b int) partition by list (a); +diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out +index 0f623f7119..b48588a54e 100644 +--- a/src/test/regress/expected/generated.out ++++ b/src/test/regress/expected/generated.out +@@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR + ALTER TABLE gtest10a DROP COLUMN b; + INSERT INTO gtest10a (a) VALUES (1); + -- privileges +-CREATE USER regress_user11; ++CREATE USER regress_user11 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE gtest11s (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED); + INSERT INTO gtest11s VALUES (1, 10), (2, 20); + GRANT SELECT (a, c) ON gtest11s TO regress_user11; +diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out +index 127c953297..e6f8272f99 100644 +--- a/src/test/regress/expected/guc.out ++++ b/src/test/regress/expected/guc.out +@@ -584,7 +584,7 @@ PREPARE foo AS SELECT 1; + LISTEN foo_event; + SET vacuum_cost_delay = 13; + CREATE TEMP TABLE tmp_foo (data text) ON COMMIT DELETE ROWS; +-CREATE ROLE regress_guc_user; ++CREATE ROLE regress_guc_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_guc_user; + -- look changes + SELECT pg_listening_channels(); +diff --git a/src/test/regress/expected/hash_index.out b/src/test/regress/expected/hash_index.out +index a2036a1597..805d73b9d2 100644 +--- a/src/test/regress/expected/hash_index.out ++++ b/src/test/regress/expected/hash_index.out +@@ -20,10 +20,14 @@ CREATE TABLE hash_f8_heap ( + random float8 + ); + \set filename :abs_srcdir '/data/hash.data' +-COPY hash_i4_heap FROM :'filename'; +-COPY hash_name_heap FROM :'filename'; +-COPY hash_txt_heap FROM :'filename'; +-COPY hash_f8_heap FROM :'filename'; ++\set command '\\copy hash_i4_heap FROM ' :'filename'; ++:command ++\set command '\\copy hash_name_heap FROM ' :'filename'; ++:command ++\set command '\\copy hash_txt_heap FROM ' :'filename'; ++:command ++\set command '\\copy hash_f8_heap FROM ' :'filename'; ++:command + -- the data in this file has a lot of duplicates in the index key + -- fields, leading to long bucket chains and lots of table expansion. + -- this is therefore a stress test of the bucket overflow code (unlike +diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out +index cc7772349f..98a08eb48d 100644 +--- a/src/test/regress/expected/identity.out ++++ b/src/test/regress/expected/identity.out +@@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT; + ALTER TABLE itest7 ALTER COLUMN a RESTART; + ALTER TABLE itest7 ALTER COLUMN a DROP IDENTITY; + -- privileges +-CREATE USER regress_identity_user1; ++CREATE USER regress_identity_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE itest8 (a int GENERATED ALWAYS AS IDENTITY, b text); + GRANT SELECT, INSERT ON itest8 TO regress_identity_user1; + SET ROLE regress_identity_user1; +diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out +index 4943429e9b..0257f22b15 100644 +--- a/src/test/regress/expected/inherit.out ++++ b/src/test/regress/expected/inherit.out +@@ -2606,7 +2606,7 @@ create index on permtest_parent (left(c, 3)); + insert into permtest_parent + select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i; + analyze permtest_parent; +-create role regress_no_child_access; ++create role regress_no_child_access PASSWORD NEON_PASSWORD_PLACEHOLDER; + revoke all on permtest_grandchild from regress_no_child_access; + grant select on permtest_parent to regress_no_child_access; + set session authorization regress_no_child_access; +diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out +index cf4b5221a8..fa6ccb639c 100644 +--- a/src/test/regress/expected/insert.out ++++ b/src/test/regress/expected/insert.out +@@ -802,7 +802,7 @@ drop table mlparted5; + -- appropriate key description (or none) in various situations + create table key_desc (a int, b int) partition by list ((a+0)); + create table key_desc_1 partition of key_desc for values in (1) partition by range (b); +-create user regress_insert_other_user; ++create user regress_insert_other_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant select (a) on key_desc_1 to regress_insert_other_user; + grant insert on key_desc to regress_insert_other_user; + set role regress_insert_other_user; +@@ -914,7 +914,7 @@ DETAIL: Failing row contains (2, hi there). + -- check that the message shows the appropriate column description in a + -- situation where the partitioned table is not the primary ModifyTable node + create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int); +-create role regress_coldesc_role; ++create role regress_coldesc_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant insert on inserttest3 to regress_coldesc_role; + grant insert on brtrigpartcon to regress_coldesc_role; + revoke select on brtrigpartcon from regress_coldesc_role; +diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out +index f8a7dac960..64dcaf171c 100644 +--- a/src/test/regress/expected/jsonb.out ++++ b/src/test/regress/expected/jsonb.out +@@ -4,7 +4,8 @@ CREATE TABLE testjsonb ( + j jsonb + ); + \set filename :abs_srcdir '/data/jsonb.data' +-COPY testjsonb FROM :'filename'; ++\set command '\\copy testjsonb FROM ' :'filename'; ++:command + -- Strings. + SELECT '""'::jsonb; -- OK. + jsonb +diff --git a/src/test/regress/expected/largeobject.out b/src/test/regress/expected/largeobject.out +index 4921dd79ae..d18a3cdd66 100644 +--- a/src/test/regress/expected/largeobject.out ++++ b/src/test/regress/expected/largeobject.out +@@ -7,7 +7,7 @@ + -- ensure consistent test output regardless of the default bytea format + SET bytea_output TO escape; + -- Test ALTER LARGE OBJECT OWNER +-CREATE ROLE regress_lo_user; ++CREATE ROLE regress_lo_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + SELECT lo_create(42); + lo_create + ----------- +@@ -346,7 +346,8 @@ SELECT lo_unlink(loid) from lotest_stash_values; + + TRUNCATE lotest_stash_values; + \set filename :abs_srcdir '/data/tenk.data' +-INSERT INTO lotest_stash_values (loid) SELECT lo_import(:'filename'); ++\lo_import :filename ++INSERT INTO lotest_stash_values (loid) VALUES (:LASTOID); + BEGIN; + UPDATE lotest_stash_values SET fd=lo_open(loid, CAST(x'20000' | x'40000' AS integer)); + -- verify length of large object +@@ -410,12 +411,8 @@ SELECT lo_close(fd) FROM lotest_stash_values; + + END; + \set filename :abs_builddir '/results/lotest.txt' +-SELECT lo_export(loid, :'filename') FROM lotest_stash_values; +- lo_export +------------ +- 1 +-(1 row) +- ++SELECT loid FROM lotest_stash_values \gset ++\lo_export :loid, :filename + \lo_import :filename + \set newloid :LASTOID + -- just make sure \lo_export does not barf +diff --git a/src/test/regress/expected/lock.out b/src/test/regress/expected/lock.out +index ad137d3645..8dac447436 100644 +--- a/src/test/regress/expected/lock.out ++++ b/src/test/regress/expected/lock.out +@@ -16,7 +16,7 @@ CREATE VIEW lock_view3 AS SELECT * from lock_view2; + CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1; + CREATE VIEW lock_view5 AS SELECT * from lock_tbl1 where a in (select * from lock_tbl1a); + CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub; +-CREATE ROLE regress_rol_lock1; ++CREATE ROLE regress_rol_lock1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER ROLE regress_rol_lock1 SET search_path = lock_schema1; + GRANT USAGE ON SCHEMA lock_schema1 TO regress_rol_lock1; + -- Try all valid lock options; also try omitting the optional TABLE keyword. +diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out +index 67a50bde3d..7eeafd2603 100644 +--- a/src/test/regress/expected/matview.out ++++ b/src/test/regress/expected/matview.out +@@ -549,7 +549,7 @@ SELECT * FROM mvtest_mv_v; + DROP TABLE mvtest_v CASCADE; + NOTICE: drop cascades to materialized view mvtest_mv_v + -- make sure running as superuser works when MV owned by another role (bug #11208) +-CREATE ROLE regress_user_mvtest; ++CREATE ROLE regress_user_mvtest PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET ROLE regress_user_mvtest; + -- this test case also checks for ambiguity in the queries issued by + -- refresh_by_match_merge(), by choosing column names that intentionally +@@ -615,7 +615,7 @@ HINT: Use the REFRESH MATERIALIZED VIEW command. + ROLLBACK; + -- INSERT privileges if relation owner is not allowed to insert. + CREATE SCHEMA matview_schema; +-CREATE USER regress_matview_user; ++CREATE USER regress_matview_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user + REVOKE INSERT ON TABLES FROM regress_matview_user; + GRANT ALL ON SCHEMA matview_schema TO public; +diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out +index bc9a59803f..5b9ddf0626 100644 +--- a/src/test/regress/expected/merge.out ++++ b/src/test/regress/expected/merge.out +@@ -1,9 +1,9 @@ + -- + -- MERGE + -- +-CREATE USER regress_merge_privs; +-CREATE USER regress_merge_no_privs; +-CREATE USER regress_merge_none; ++CREATE USER regress_merge_privs PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_merge_no_privs PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_merge_none PASSWORD NEON_PASSWORD_PLACEHOLDER; + DROP TABLE IF EXISTS target; + NOTICE: table "target" does not exist, skipping + DROP TABLE IF EXISTS source; +diff --git a/src/test/regress/expected/misc.out b/src/test/regress/expected/misc.out +index 6e816c57f1..6ef45b468e 100644 +--- a/src/test/regress/expected/misc.out ++++ b/src/test/regress/expected/misc.out +@@ -59,9 +59,11 @@ DROP TABLE tmp; + -- copy + -- + \set filename :abs_builddir '/results/onek.data' +-COPY onek TO :'filename'; ++\set command '\\copy onek TO ' :'filename'; ++:command + CREATE TEMP TABLE onek_copy (LIKE onek); +-COPY onek_copy FROM :'filename'; ++\set command '\\copy onek_copy FROM ' :'filename'; ++:command + SELECT * FROM onek EXCEPT ALL SELECT * FROM onek_copy; + unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 + ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- +@@ -73,9 +75,11 @@ SELECT * FROM onek_copy EXCEPT ALL SELECT * FROM onek; + (0 rows) + + \set filename :abs_builddir '/results/stud_emp.data' +-COPY BINARY stud_emp TO :'filename'; ++\set command '\\COPY BINARY stud_emp TO ' :'filename'; ++:command + CREATE TEMP TABLE stud_emp_copy (LIKE stud_emp); +-COPY BINARY stud_emp_copy FROM :'filename'; ++\set command '\\COPY BINARY stud_emp_copy FROM ' :'filename'; ++:command + SELECT * FROM stud_emp_copy; + name | age | location | salary | manager | gpa | percent + -------+-----+------------+--------+---------+-----+--------- +diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out +index c669948370..47111b1d24 100644 +--- a/src/test/regress/expected/misc_functions.out ++++ b/src/test/regress/expected/misc_functions.out +@@ -297,7 +297,7 @@ SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity + t + (1 row) + +-CREATE ROLE regress_log_memory; ++CREATE ROLE regress_log_memory PASSWORD NEON_PASSWORD_PLACEHOLDER; + SELECT has_function_privilege('regress_log_memory', + 'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no + has_function_privilege +@@ -483,7 +483,7 @@ select count(*) > 0 from + -- + -- Test replication slot directory functions + -- +-CREATE ROLE regress_slot_dir_funcs; ++CREATE ROLE regress_slot_dir_funcs PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- Not available by default. + SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_logicalsnapdir()', 'EXECUTE'); +diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out +index fc42d418bf..e38f517574 100644 +--- a/src/test/regress/expected/object_address.out ++++ b/src/test/regress/expected/object_address.out +@@ -5,7 +5,7 @@ + SET client_min_messages TO 'warning'; + DROP ROLE IF EXISTS regress_addr_user; + RESET client_min_messages; +-CREATE USER regress_addr_user; ++CREATE USER regress_addr_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- Test generic object addressing/identification functions + CREATE SCHEMA addr_nsp; + SET search_path TO 'addr_nsp'; +diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out +index 8475231735..1afae5395f 100644 +--- a/src/test/regress/expected/password.out ++++ b/src/test/regress/expected/password.out +@@ -12,11 +12,11 @@ SET password_encryption = 'md5'; -- ok + SET password_encryption = 'scram-sha-256'; -- ok + -- consistency of password entries + SET password_encryption = 'md5'; +-CREATE ROLE regress_passwd1 PASSWORD 'role_pwd1'; +-CREATE ROLE regress_passwd2 PASSWORD 'role_pwd2'; ++CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET password_encryption = 'scram-sha-256'; +-CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3'; +-CREATE ROLE regress_passwd4 PASSWORD NULL; ++CREATE ROLE regress_passwd3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- check list of created entries + -- + -- The scram secret will look something like: +@@ -30,10 +30,10 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ + ORDER BY rolname, rolpassword; + rolname | rolpassword_masked + -----------------+--------------------------------------------------- +- regress_passwd1 | md5783277baca28003b33453252be4dbb34 +- regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3 ++ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 + regress_passwd3 | SCRAM-SHA-256$4096:$: +- regress_passwd4 | ++ regress_passwd4 | SCRAM-SHA-256$4096:$: + (4 rows) + + -- Rename a role +@@ -54,24 +54,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; + -- passwords. + SET password_encryption = 'md5'; + -- encrypt with MD5 +-ALTER ROLE regress_passwd2 PASSWORD 'foo'; ++ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- already encrypted, use as they are + ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + SET password_encryption = 'scram-sha-256'; + -- create SCRAM secret +-ALTER ROLE regress_passwd4 PASSWORD 'foo'; ++ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- already encrypted with MD5, use as it is + CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + -- This looks like a valid SCRAM-SHA-256 secret, but it is not + -- so it should be hashed with SCRAM-SHA-256. + CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234'; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + -- These may look like valid MD5 secrets, but they are not, so they + -- should be hashed with SCRAM-SHA-256. + -- trailing garbage at the end + CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz'; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + -- invalid length + CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz'; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + -- Changing the SCRAM iteration count + SET scram_iterations = 1024; + CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount'; +@@ -81,63 +87,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+ + ORDER BY rolname, rolpassword; + rolname | rolpassword_masked + -----------------+--------------------------------------------------- +- regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70 +- regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb ++ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1 ++ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2 + regress_passwd3 | SCRAM-SHA-256$4096:$: + regress_passwd4 | SCRAM-SHA-256$4096:$: +- regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023 +- regress_passwd6 | SCRAM-SHA-256$4096:$: +- regress_passwd7 | SCRAM-SHA-256$4096:$: +- regress_passwd8 | SCRAM-SHA-256$4096:$: + regress_passwd9 | SCRAM-SHA-256$1024:$: +-(9 rows) ++(5 rows) + + -- An empty password is not allowed, in any form + CREATE ROLE regress_passwd_empty PASSWORD ''; + NOTICE: empty string is not a valid password, clearing password ++ERROR: Failed to get encrypted password: User "regress_passwd_empty" has no password assigned. + ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a'; +-NOTICE: empty string is not a valid password, clearing password ++ERROR: role "regress_passwd_empty" does not exist + ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4='; +-NOTICE: empty string is not a valid password, clearing password ++ERROR: role "regress_passwd_empty" does not exist + SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty'; + rolpassword + ------------- +- +-(1 row) ++(0 rows) + + -- Test with invalid stored and server keys. + -- + -- The first is valid, to act as a control. The others have too long + -- stored/server keys. They will be re-hashed. + CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI='; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='; ++ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"} + -- Check that the invalid secrets were re-hashed. A re-hashed secret + -- should not contain the original salt. + SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed + FROM pg_authid + WHERE rolname LIKE 'regress_passwd_sha_len%' + ORDER BY rolname; +- rolname | is_rolpassword_rehashed +--------------------------+------------------------- +- regress_passwd_sha_len0 | f +- regress_passwd_sha_len1 | t +- regress_passwd_sha_len2 | t +-(3 rows) ++ rolname | is_rolpassword_rehashed ++---------+------------------------- ++(0 rows) + + DROP ROLE regress_passwd1; + DROP ROLE regress_passwd2; + DROP ROLE regress_passwd3; + DROP ROLE regress_passwd4; + DROP ROLE regress_passwd5; ++ERROR: role "regress_passwd5" does not exist + DROP ROLE regress_passwd6; ++ERROR: role "regress_passwd6" does not exist + DROP ROLE regress_passwd7; ++ERROR: role "regress_passwd7" does not exist + DROP ROLE regress_passwd8; ++ERROR: role "regress_passwd8" does not exist + DROP ROLE regress_passwd9; + DROP ROLE regress_passwd_empty; ++ERROR: role "regress_passwd_empty" does not exist + DROP ROLE regress_passwd_sha_len0; ++ERROR: role "regress_passwd_sha_len0" does not exist + DROP ROLE regress_passwd_sha_len1; ++ERROR: role "regress_passwd_sha_len1" does not exist + DROP ROLE regress_passwd_sha_len2; ++ERROR: role "regress_passwd_sha_len2" does not exist + -- all entries should have been removed + SELECT rolname, rolpassword + FROM pg_authid +diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out +index fbb0489a4f..2905194e2c 100644 +--- a/src/test/regress/expected/privileges.out ++++ b/src/test/regress/expected/privileges.out +@@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3 + + RESET client_min_messages; + -- test proper begins here +-CREATE USER regress_priv_user1; +-CREATE USER regress_priv_user2; +-CREATE USER regress_priv_user3; +-CREATE USER regress_priv_user4; +-CREATE USER regress_priv_user5; +-CREATE USER regress_priv_user5; -- duplicate ++CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- duplicate + ERROR: role "regress_priv_user5" already exists +-CREATE USER regress_priv_user6; +-CREATE USER regress_priv_user7; +-CREATE USER regress_priv_user8; +-CREATE USER regress_priv_user9; +-CREATE USER regress_priv_user10; +-CREATE ROLE regress_priv_role; ++CREATE USER regress_priv_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user8 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user9 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user10 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_priv_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- circular ADMIN OPTION grants should be disallowed + GRANT regress_priv_user1 TO regress_priv_user2 WITH ADMIN OPTION; + GRANT regress_priv_user1 TO regress_priv_user3 WITH ADMIN OPTION GRANTED BY regress_priv_user2; +@@ -108,11 +108,11 @@ ERROR: role "regress_priv_user5" cannot be dropped because some objects depend + DETAIL: privileges for membership of role regress_priv_user6 in role regress_priv_user1 + DROP ROLE regress_priv_user1, regress_priv_user5; -- ok, despite order + -- recreate the roles we just dropped +-CREATE USER regress_priv_user1; +-CREATE USER regress_priv_user2; +-CREATE USER regress_priv_user3; +-CREATE USER regress_priv_user4; +-CREATE USER regress_priv_user5; ++CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT pg_read_all_data TO regress_priv_user6; + GRANT pg_write_all_data TO regress_priv_user7; + GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION; +@@ -145,8 +145,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8; + DROP USER regress_priv_user10; + DROP USER regress_priv_user9; + DROP USER regress_priv_user8; +-CREATE GROUP regress_priv_group1; +-CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 USER regress_priv_user2; ++CREATE GROUP regress_priv_group1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER USER regress_priv_user2; + ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4; + GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1; + SET SESSION AUTHORIZATION regress_priv_user1; +@@ -172,12 +172,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre + ERROR: permission denied to grant privileges as role "regress_priv_role" + DETAIL: The grantor must have the ADMIN option on role "regress_priv_role". + GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE; ++ERROR: permission denied to grant privileges as role "neondb_owner" ++DETAIL: The grantor must have the ADMIN option on role "regress_priv_role". + REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY foo; -- error + ERROR: role "foo" does not exist + REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY regress_priv_user2; -- warning, noop + WARNING: role "regress_priv_user1" has not been granted membership in role "regress_priv_role" by role "regress_priv_user2" + REVOKE ADMIN OPTION FOR regress_priv_role FROM regress_priv_user1 GRANTED BY CURRENT_USER; ++WARNING: role "regress_priv_user1" has not been granted membership in role "regress_priv_role" by role "neondb_owner" + REVOKE regress_priv_role FROM regress_priv_user1 GRANTED BY CURRENT_ROLE; ++WARNING: role "regress_priv_user1" has not been granted membership in role "regress_priv_role" by role "neondb_owner" + DROP ROLE regress_priv_role; + SET SESSION AUTHORIZATION regress_priv_user1; + SELECT session_user, current_user; +@@ -1709,7 +1713,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP + + -- security-restricted operations + \c - +-CREATE ROLE regress_sro_user; ++CREATE ROLE regress_sro_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- Check that index expressions and predicates are run as the table's owner + -- A dummy index function checking current_user + CREATE FUNCTION sro_ifun(int) RETURNS int AS $$ +@@ -2601,8 +2605,8 @@ drop cascades to function testns.priv_testagg(integer) + drop cascades to function testns.priv_testproc(integer) + -- Change owner of the schema & and rename of new schema owner + \c - +-CREATE ROLE regress_schemauser1 superuser login; +-CREATE ROLE regress_schemauser2 superuser login; ++CREATE ROLE regress_schemauser1 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_schemauser2 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION ROLE regress_schemauser1; + CREATE SCHEMA testns; + SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid; +@@ -2725,7 +2729,7 @@ DROP USER regress_priv_user7; + DROP USER regress_priv_user8; -- does not exist + ERROR: role "regress_priv_user8" does not exist + -- permissions with LOCK TABLE +-CREATE USER regress_locktable_user; ++CREATE USER regress_locktable_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE lock_table (a int); + -- LOCK TABLE and SELECT permission + GRANT SELECT ON lock_table TO regress_locktable_user; +@@ -2807,7 +2811,7 @@ DROP USER regress_locktable_user; + -- pg_backend_memory_contexts. + -- switch to superuser + \c - +-CREATE ROLE regress_readallstats; ++CREATE ROLE regress_readallstats PASSWORD NEON_PASSWORD_PLACEHOLDER; + SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no + has_table_privilege + --------------------- +@@ -2851,10 +2855,10 @@ RESET ROLE; + -- clean up + DROP ROLE regress_readallstats; + -- test role grantor machinery +-CREATE ROLE regress_group; +-CREATE ROLE regress_group_direct_manager; +-CREATE ROLE regress_group_indirect_manager; +-CREATE ROLE regress_group_member; ++CREATE ROLE regress_group PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_group_direct_manager PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_group_indirect_manager PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_group_member PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE; + GRANT regress_group_direct_manager TO regress_group_indirect_manager; + SET SESSION AUTHORIZATION regress_group_direct_manager; +@@ -2883,9 +2887,9 @@ DROP ROLE regress_group_direct_manager; + DROP ROLE regress_group_indirect_manager; + DROP ROLE regress_group_member; + -- test SET and INHERIT options with object ownership changes +-CREATE ROLE regress_roleoption_protagonist; +-CREATE ROLE regress_roleoption_donor; +-CREATE ROLE regress_roleoption_recipient; ++CREATE ROLE regress_roleoption_protagonist PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_roleoption_donor PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_roleoption_recipient PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA regress_roleoption; + GRANT CREATE, USAGE ON SCHEMA regress_roleoption TO PUBLIC; + GRANT regress_roleoption_donor TO regress_roleoption_protagonist WITH INHERIT TRUE, SET FALSE; +diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out +index 7cd0c27cca..d7a124ed68 100644 +--- a/src/test/regress/expected/psql.out ++++ b/src/test/regress/expected/psql.out +@@ -2857,7 +2857,7 @@ Type | func + -- check conditional am display + \pset expanded off + CREATE SCHEMA tableam_display; +-CREATE ROLE regress_display_role; ++CREATE ROLE regress_display_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER SCHEMA tableam_display OWNER TO regress_display_role; + SET search_path TO tableam_display; + CREATE ACCESS METHOD heap_psql TYPE TABLE HANDLER heap_tableam_handler; +@@ -4808,7 +4808,7 @@ last error message: division by zero + last error code: 22012 + \unset FETCH_COUNT + create schema testpart; +-create role regress_partitioning_role; ++create role regress_partitioning_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + alter schema testpart owner to regress_partitioning_role; + set role to regress_partitioning_role; + -- run test inside own schema and hide other partitions +@@ -5260,7 +5260,7 @@ reset work_mem; + + -- check \df+ + -- we have to use functions with a predictable owner name, so make a role +-create role regress_psql_user superuser; ++create role regress_psql_user superuser PASSWORD NEON_PASSWORD_PLACEHOLDER; + begin; + set session authorization regress_psql_user; + create function psql_df_internal (float8) +@@ -5544,11 +5544,14 @@ CREATE TEMPORARY TABLE reload_output( + line text + ); + SELECT 1 AS a \g :g_out_file +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT 2 AS b\; SELECT 3 AS c\; SELECT 4 AS d \g :g_out_file +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + COPY (SELECT 'foo') TO STDOUT \; COPY (SELECT 'bar') TO STDOUT \g :g_out_file +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + line + --------- +@@ -5587,13 +5590,15 @@ SELECT 1 AS a\; SELECT 2 AS b\; SELECT 3 AS c; + -- COPY TO file + -- The data goes to :g_out_file and the status to :o_out_file + \set QUIET false +-COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO :'g_out_file'; ++\set command '\\COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO ' :'g_out_file'; ++:command + -- DML command status + UPDATE onek SET unique1 = unique1 WHERE false; + \set QUIET true + \o + -- Check the contents of the files generated. +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + line + ------ +@@ -5610,7 +5615,8 @@ SELECT line FROM reload_output ORDER BY lineno; + (10 rows) + + TRUNCATE TABLE reload_output; +-COPY reload_output(line) FROM :'o_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'o_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + line + ---------- +@@ -5647,7 +5653,8 @@ COPY (SELECT 'foo1') TO STDOUT \; COPY (SELECT 'bar1') TO STDOUT; + COPY (SELECT 'foo2') TO STDOUT \; COPY (SELECT 'bar2') TO STDOUT \g :g_out_file + \o + -- Check the contents of the files generated. +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + line + ------ +@@ -5656,7 +5663,8 @@ SELECT line FROM reload_output ORDER BY lineno; + (2 rows) + + TRUNCATE TABLE reload_output; +-COPY reload_output(line) FROM :'o_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'o_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + line + ------ +@@ -6619,10 +6627,10 @@ cross-database references are not implemented: "no.such.database"."no.such.schem + \dX "no.such.database"."no.such.schema"."no.such.extended.statistics" + cross-database references are not implemented: "no.such.database"."no.such.schema"."no.such.extended.statistics" + -- check \drg and \du +-CREATE ROLE regress_du_role0; +-CREATE ROLE regress_du_role1; +-CREATE ROLE regress_du_role2; +-CREATE ROLE regress_du_admin; ++CREATE ROLE regress_du_role0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_du_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_du_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_du_admin PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE; + GRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE; + GRANT regress_du_role2 TO regress_du_admin WITH ADMIN TRUE; +diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out +index 69dc6cfd85..68390cc18a 100644 +--- a/src/test/regress/expected/publication.out ++++ b/src/test/regress/expected/publication.out +@@ -1,9 +1,9 @@ + -- + -- PUBLICATION + -- +-CREATE ROLE regress_publication_user LOGIN SUPERUSER; +-CREATE ROLE regress_publication_user2; +-CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER; ++CREATE ROLE regress_publication_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_publication_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION 'regress_publication_user'; + -- suppress warning that depends on wal_level + SET client_min_messages = 'ERROR'; +@@ -1211,7 +1211,7 @@ ALTER PUBLICATION testpub2 ADD TABLE testpub_tbl1; -- ok + DROP PUBLICATION testpub2; + DROP PUBLICATION testpub3; + SET ROLE regress_publication_user; +-CREATE ROLE regress_publication_user3; ++CREATE ROLE regress_publication_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT regress_publication_user2 TO regress_publication_user3; + SET client_min_messages = 'ERROR'; + CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test; +diff --git a/src/test/regress/expected/regproc.out b/src/test/regress/expected/regproc.out +index a9420850b8..bd3b5f312d 100644 +--- a/src/test/regress/expected/regproc.out ++++ b/src/test/regress/expected/regproc.out +@@ -2,7 +2,7 @@ + -- regproc + -- + /* If objects exist, return oids */ +-CREATE ROLE regress_regrole_test; ++CREATE ROLE regress_regrole_test PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- without schemaname + SELECT regoper('||/'); + regoper +diff --git a/src/test/regress/expected/roleattributes.out b/src/test/regress/expected/roleattributes.out +index 5e6969b173..2c4d52237f 100644 +--- a/src/test/regress/expected/roleattributes.out ++++ b/src/test/regress/expected/roleattributes.out +@@ -1,233 +1,233 @@ + -- default for superuser is false +-CREATE ROLE regress_test_def_superuser; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_superuser | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_superuser PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_superuser | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_superuser WITH SUPERUSER; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_superuser | t | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_superuser WITH SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_superuser | t | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_superuser WITH NOSUPERUSER; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_superuser | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_superuser | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_superuser WITH SUPERUSER; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_superuser | t | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_superuser | t | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for inherit is true +-CREATE ROLE regress_test_def_inherit; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_inherit | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_inherit PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_inherit | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_inherit WITH NOINHERIT; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_inherit | f | f | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_inherit WITH NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_inherit | f | f | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_inherit WITH INHERIT; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_inherit | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_inherit | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_inherit WITH NOINHERIT; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_inherit | f | f | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_inherit | f | f | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for create role is false +-CREATE ROLE regress_test_def_createrole; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_createrole | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_createrole PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_createrole | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_createrole WITH CREATEROLE; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_createrole | f | t | t | f | f | f | f | -1 | | ++CREATE ROLE regress_test_createrole WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_createrole | f | t | t | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_createrole WITH NOCREATEROLE; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_createrole | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_createrole | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_createrole WITH CREATEROLE; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_createrole | f | t | t | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_createrole | f | t | t | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for create database is false +-CREATE ROLE regress_test_def_createdb; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_createdb | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_createdb PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_createdb | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_createdb WITH CREATEDB; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_createdb | f | t | f | t | f | f | f | -1 | | ++CREATE ROLE regress_test_createdb WITH CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_createdb | f | t | f | t | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_createdb WITH NOCREATEDB; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_createdb | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_createdb | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_createdb WITH CREATEDB; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_createdb | f | t | f | t | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++-----------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_createdb | f | t | f | t | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for can login is false for role +-CREATE ROLE regress_test_def_role_canlogin; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +---------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_role_canlogin | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_role_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++--------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_role_canlogin | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_role_canlogin WITH LOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_role_canlogin | f | t | f | f | t | f | f | -1 | | ++CREATE ROLE regress_test_role_canlogin WITH LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_role_canlogin | f | t | f | f | t | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_role_canlogin WITH NOLOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_role_canlogin | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_role_canlogin | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_role_canlogin WITH LOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_role_canlogin | f | t | f | f | t | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_role_canlogin | f | t | f | f | t | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for can login is true for user +-CREATE USER regress_test_def_user_canlogin; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +---------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_user_canlogin | f | t | f | f | t | f | f | -1 | | ++CREATE USER regress_test_def_user_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++--------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_user_canlogin | f | t | f | f | t | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE USER regress_test_user_canlogin WITH NOLOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_user_canlogin | f | t | f | f | f | f | f | -1 | | ++CREATE USER regress_test_user_canlogin WITH NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_user_canlogin | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER USER regress_test_user_canlogin WITH LOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_user_canlogin | f | t | f | f | t | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_user_canlogin | f | t | f | f | t | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER USER regress_test_user_canlogin WITH NOLOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_user_canlogin | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_user_canlogin | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for replication is false +-CREATE ROLE regress_test_def_replication; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_replication | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_replication PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_replication | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_replication WITH REPLICATION; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_replication | f | t | f | f | f | t | f | -1 | | ++CREATE ROLE regress_test_replication WITH REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_replication | f | t | f | f | f | t | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_replication WITH NOREPLICATION; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_replication | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_replication | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_replication WITH REPLICATION; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +---------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_replication | f | t | f | f | f | t | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++--------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_replication | f | t | f | f | f | t | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- default for bypassrls is false +-CREATE ROLE regress_test_def_bypassrls; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_def_bypassrls | f | t | f | f | f | f | f | -1 | | ++CREATE ROLE regress_test_def_bypassrls PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++----------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_def_bypassrls | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + +-CREATE ROLE regress_test_bypassrls WITH BYPASSRLS; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_bypassrls | f | t | f | f | f | f | t | -1 | | ++CREATE ROLE regress_test_bypassrls WITH BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_bypassrls | f | t | f | f | f | f | t | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_bypassrls WITH NOBYPASSRLS; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_bypassrls | f | t | f | f | f | f | f | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_bypassrls | f | t | f | f | f | f | f | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + ALTER ROLE regress_test_bypassrls WITH BYPASSRLS; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; +- rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | rolpassword | rolvaliduntil +-------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+-------------+--------------- +- regress_test_bypassrls | f | t | f | f | f | f | t | -1 | | ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; ++ rolname | rolsuper | rolinherit | rolcreaterole | rolcreatedb | rolcanlogin | rolreplication | rolbypassrls | rolconnlimit | regexp_replace | rolvaliduntil ++------------------------+----------+------------+---------------+-------------+-------------+----------------+--------------+--------------+---------------------------------------------------+--------------- ++ regress_test_bypassrls | f | t | f | f | f | f | t | -1 | SCRAM-SHA-256$4096:$: | + (1 row) + + -- clean up roles +diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out +index 97ca9bf72c..b2a7a6f710 100644 +--- a/src/test/regress/expected/rowsecurity.out ++++ b/src/test/regress/expected/rowsecurity.out +@@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2; + DROP SCHEMA IF EXISTS regress_rls_schema CASCADE; + RESET client_min_messages; + -- initial setup +-CREATE USER regress_rls_alice NOLOGIN; +-CREATE USER regress_rls_bob NOLOGIN; +-CREATE USER regress_rls_carol NOLOGIN; +-CREATE USER regress_rls_dave NOLOGIN; +-CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN; +-CREATE ROLE regress_rls_group1 NOLOGIN; +-CREATE ROLE regress_rls_group2 NOLOGIN; ++CREATE USER regress_rls_alice NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_bob NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_carol NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_dave NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_group1 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_group2 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT regress_rls_group1 TO regress_rls_bob; + GRANT regress_rls_group2 TO regress_rls_carol; + CREATE SCHEMA regress_rls_schema; +@@ -4352,8 +4352,8 @@ SELECT count(*) = 0 FROM pg_depend + + -- DROP OWNED BY testing + RESET SESSION AUTHORIZATION; +-CREATE ROLE regress_rls_dob_role1; +-CREATE ROLE regress_rls_dob_role2; ++CREATE ROLE regress_rls_dob_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_dob_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE dob_t1 (c1 int); + CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1); + CREATE POLICY p1 ON dob_t1 TO regress_rls_dob_role1 USING (true); +diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out +index 09a255649b..15895f0c53 100644 +--- a/src/test/regress/expected/rules.out ++++ b/src/test/regress/expected/rules.out +@@ -3708,7 +3708,7 @@ DROP TABLE ruletest2; + -- Test non-SELECT rule on security invoker view. + -- Should use view owner's permissions. + -- +-CREATE USER regress_rule_user1; ++CREATE USER regress_rule_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE ruletest_t1 (x int); + CREATE TABLE ruletest_t2 (x int); + CREATE VIEW ruletest_v1 WITH (security_invoker=true) AS +diff --git a/src/test/regress/expected/security_label.out b/src/test/regress/expected/security_label.out +index a8e01a6220..5a9cef4ede 100644 +--- a/src/test/regress/expected/security_label.out ++++ b/src/test/regress/expected/security_label.out +@@ -6,8 +6,8 @@ SET client_min_messages TO 'warning'; + DROP ROLE IF EXISTS regress_seclabel_user1; + DROP ROLE IF EXISTS regress_seclabel_user2; + RESET client_min_messages; +-CREATE USER regress_seclabel_user1 WITH CREATEROLE; +-CREATE USER regress_seclabel_user2; ++CREATE USER regress_seclabel_user1 WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_seclabel_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE seclabel_tbl1 (a int, b text); + CREATE TABLE seclabel_tbl2 (x int, y text); + CREATE VIEW seclabel_view1 AS SELECT * FROM seclabel_tbl2; +@@ -19,21 +19,21 @@ ALTER TABLE seclabel_tbl2 OWNER TO regress_seclabel_user2; + -- Test of SECURITY LABEL statement without a plugin + -- + SECURITY LABEL ON TABLE seclabel_tbl1 IS 'classified'; -- fail +-ERROR: no security label providers have been loaded ++ERROR: must specify provider when multiple security label providers have been loaded + SECURITY LABEL FOR 'dummy' ON TABLE seclabel_tbl1 IS 'classified'; -- fail + ERROR: security label provider "dummy" is not loaded + SECURITY LABEL ON TABLE seclabel_tbl1 IS '...invalid label...'; -- fail +-ERROR: no security label providers have been loaded ++ERROR: must specify provider when multiple security label providers have been loaded + SECURITY LABEL ON TABLE seclabel_tbl3 IS 'unclassified'; -- fail +-ERROR: no security label providers have been loaded ++ERROR: must specify provider when multiple security label providers have been loaded + SECURITY LABEL ON ROLE regress_seclabel_user1 IS 'classified'; -- fail +-ERROR: no security label providers have been loaded ++ERROR: must specify provider when multiple security label providers have been loaded + SECURITY LABEL FOR 'dummy' ON ROLE regress_seclabel_user1 IS 'classified'; -- fail + ERROR: security label provider "dummy" is not loaded + SECURITY LABEL ON ROLE regress_seclabel_user1 IS '...invalid label...'; -- fail +-ERROR: no security label providers have been loaded ++ERROR: must specify provider when multiple security label providers have been loaded + SECURITY LABEL ON ROLE regress_seclabel_user3 IS 'unclassified'; -- fail +-ERROR: no security label providers have been loaded ++ERROR: must specify provider when multiple security label providers have been loaded + -- clean up objects + DROP FUNCTION seclabel_four(); + DROP DOMAIN seclabel_domain; +diff --git a/src/test/regress/expected/select_into.out b/src/test/regress/expected/select_into.out +index b79fe9a1c0..e29fab88ab 100644 +--- a/src/test/regress/expected/select_into.out ++++ b/src/test/regress/expected/select_into.out +@@ -15,7 +15,7 @@ DROP TABLE sitmp1; + -- SELECT INTO and INSERT permission, if owner is not allowed to insert. + -- + CREATE SCHEMA selinto_schema; +-CREATE USER regress_selinto_user; ++CREATE USER regress_selinto_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user + REVOKE INSERT ON TABLES FROM regress_selinto_user; + GRANT ALL ON SCHEMA selinto_schema TO public; +diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out +index 1aeed8452b..7d9427d070 100644 +--- a/src/test/regress/expected/select_views.out ++++ b/src/test/regress/expected/select_views.out +@@ -1250,7 +1250,7 @@ SELECT * FROM toyemp WHERE name = 'sharon'; + -- + -- Test for Leaky view scenario + -- +-CREATE ROLE regress_alice; ++CREATE ROLE regress_alice PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE FUNCTION f_leak (text) + RETURNS bool LANGUAGE 'plpgsql' COST 0.0000001 + AS 'BEGIN RAISE NOTICE ''f_leak => %'', $1; RETURN true; END'; +diff --git a/src/test/regress/expected/sequence.out b/src/test/regress/expected/sequence.out +index f02f020542..c9e0fda350 100644 +--- a/src/test/regress/expected/sequence.out ++++ b/src/test/regress/expected/sequence.out +@@ -22,7 +22,7 @@ CREATE SEQUENCE sequence_testx OWNED BY pg_class_oid_index.oid; -- not a table + ERROR: sequence cannot be owned by relation "pg_class_oid_index" + DETAIL: This operation is not supported for indexes. + CREATE SEQUENCE sequence_testx OWNED BY pg_class.relname; -- not same schema +-ERROR: sequence must be in same schema as table it is linked to ++ERROR: sequence must have same owner as table it is linked to + CREATE TABLE sequence_test_table (a int); + CREATE SEQUENCE sequence_testx OWNED BY sequence_test_table.b; -- wrong column + ERROR: column "b" of relation "sequence_test_table" does not exist +@@ -639,7 +639,7 @@ SELECT setval('sequence_test2', 1); -- error + ERROR: cannot execute setval() in a read-only transaction + ROLLBACK; + -- privileges tests +-CREATE USER regress_seq_user; ++CREATE USER regress_seq_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- nextval + BEGIN; + SET LOCAL SESSION AUTHORIZATION regress_seq_user; +diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out +index 94187e59cf..72346e2c71 100644 +--- a/src/test/regress/expected/stats.out ++++ b/src/test/regress/expected/stats.out +@@ -1283,37 +1283,6 @@ SELECT current_setting('fsync') = 'off' + t + (1 row) + +--- Change the tablespace so that the table is rewritten directly, then SELECT +--- from it to cause it to be read back into shared buffers. +-SELECT sum(reads) AS io_sum_shared_before_reads +- FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \gset +--- Do this in a transaction to prevent spurious failures due to concurrent accesses to our newly +--- rewritten table, e.g. by autovacuum. +-BEGIN; +-ALTER TABLE test_io_shared SET TABLESPACE regress_tblspace; +--- SELECT from the table so that the data is read into shared buffers and +--- context 'normal', object 'relation' reads are counted. +-SELECT COUNT(*) FROM test_io_shared; +- count +-------- +- 100 +-(1 row) +- +-COMMIT; +-SELECT pg_stat_force_next_flush(); +- pg_stat_force_next_flush +--------------------------- +- +-(1 row) +- +-SELECT sum(reads) AS io_sum_shared_after_reads +- FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \gset +-SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; +- ?column? +----------- +- t +-(1 row) +- + SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \gset + -- Select from the table again to count hits. +@@ -1415,6 +1384,7 @@ SELECT :io_sum_local_after_evictions > :io_sum_local_before_evictions, + -- local buffers, exercising a different codepath than standard local buffer + -- writes. + ALTER TABLE test_io_local SET TABLESPACE regress_tblspace; ++ERROR: tablespace "regress_tblspace" does not exist + SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush + -------------------------- +@@ -1426,7 +1396,7 @@ SELECT sum(writes) AS io_sum_local_new_tblspc_writes + SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes; + ?column? + ---------- +- t ++ f + (1 row) + + RESET temp_buffers; +diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out +index b4c85613de..d32a9a69ad 100644 +--- a/src/test/regress/expected/stats_ext.out ++++ b/src/test/regress/expected/stats_ext.out +@@ -70,7 +70,7 @@ DROP TABLE ext_stats_test; + CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); + CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; + COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment'; +-CREATE ROLE regress_stats_ext; ++CREATE ROLE regress_stats_ext PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_stats_ext; + COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment'; + ERROR: must be owner of statistics object ab1_a_b_stats +@@ -3214,7 +3214,7 @@ set search_path to public, stts_s1; + stts_s1 | stts_foo | col1, col2 FROM stts_t3 | defined | defined | defined + (10 rows) + +-create role regress_stats_ext nosuperuser; ++create role regress_stats_ext nosuperuser PASSWORD NEON_PASSWORD_PLACEHOLDER; + set role regress_stats_ext; + \dX + List of extended statistics +@@ -3237,7 +3237,7 @@ drop schema stts_s1, stts_s2 cascade; + drop user regress_stats_ext; + reset search_path; + -- User with no access +-CREATE USER regress_stats_user1; ++CREATE USER regress_stats_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT USAGE ON SCHEMA tststats TO regress_stats_user1; + SET SESSION AUTHORIZATION regress_stats_user1; + SELECT * FROM tststats.priv_test_tbl; -- Permission denied +diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out +index b15eddbff3..e9ba4568eb 100644 +--- a/src/test/regress/expected/subscription.out ++++ b/src/test/regress/expected/subscription.out +@@ -1,10 +1,10 @@ + -- + -- SUBSCRIPTION + -- +-CREATE ROLE regress_subscription_user LOGIN SUPERUSER; +-CREATE ROLE regress_subscription_user2; +-CREATE ROLE regress_subscription_user3 IN ROLE pg_create_subscription; +-CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER; ++CREATE ROLE regress_subscription_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_subscription_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_subscription_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_create_subscription; ++CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION 'regress_subscription_user'; + -- fail - no publications + CREATE SUBSCRIPTION regress_testsub CONNECTION 'foo'; +diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out +index 5d9e6bf12b..c5fddfdca6 100644 +--- a/src/test/regress/expected/test_setup.out ++++ b/src/test/regress/expected/test_setup.out +@@ -21,6 +21,7 @@ GRANT ALL ON SCHEMA public TO public; + -- Create a tablespace we can use in tests. + SET allow_in_place_tablespaces = true; + CREATE TABLESPACE regress_tblspace LOCATION ''; ++ERROR: CREATE TABLESPACE is not supported on Neon + -- + -- These tables have traditionally been referenced by many tests, + -- so create and populate them. Insert only non-error values here. +@@ -111,7 +112,8 @@ CREATE TABLE onek ( + string4 name + ); + \set filename :abs_srcdir '/data/onek.data' +-COPY onek FROM :'filename'; ++\set command '\\copy onek FROM ' :'filename'; ++:command + VACUUM ANALYZE onek; + CREATE TABLE onek2 AS SELECT * FROM onek; + VACUUM ANALYZE onek2; +@@ -134,7 +136,8 @@ CREATE TABLE tenk1 ( + string4 name + ); + \set filename :abs_srcdir '/data/tenk.data' +-COPY tenk1 FROM :'filename'; ++\set command '\\copy tenk1 FROM ' :'filename'; ++:command + VACUUM ANALYZE tenk1; + CREATE TABLE tenk2 AS SELECT * FROM tenk1; + VACUUM ANALYZE tenk2; +@@ -144,20 +147,23 @@ CREATE TABLE person ( + location point + ); + \set filename :abs_srcdir '/data/person.data' +-COPY person FROM :'filename'; ++\set command '\\copy person FROM ' :'filename'; ++:command + VACUUM ANALYZE person; + CREATE TABLE emp ( + salary int4, + manager name + ) INHERITS (person); + \set filename :abs_srcdir '/data/emp.data' +-COPY emp FROM :'filename'; ++\set command '\\copy emp FROM ' :'filename'; ++:command + VACUUM ANALYZE emp; + CREATE TABLE student ( + gpa float8 + ) INHERITS (person); + \set filename :abs_srcdir '/data/student.data' +-COPY student FROM :'filename'; ++\set command '\\copy student FROM ' :'filename'; ++:command + VACUUM ANALYZE student; + CREATE TABLE stud_emp ( + percent int4 +@@ -166,14 +172,16 @@ NOTICE: merging multiple inherited definitions of column "name" + NOTICE: merging multiple inherited definitions of column "age" + NOTICE: merging multiple inherited definitions of column "location" + \set filename :abs_srcdir '/data/stud_emp.data' +-COPY stud_emp FROM :'filename'; ++\set command '\\copy stud_emp FROM ' :'filename'; ++:command + VACUUM ANALYZE stud_emp; + CREATE TABLE road ( + name text, + thepath path + ); + \set filename :abs_srcdir '/data/streets.data' +-COPY road FROM :'filename'; ++\set command '\\copy road FROM ' :'filename'; ++:command + VACUUM ANALYZE road; + CREATE TABLE ihighway () INHERITS (road); + INSERT INTO ihighway +diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out +index 9fad6c8b04..a1b8e82389 100644 +--- a/src/test/regress/expected/tsearch.out ++++ b/src/test/regress/expected/tsearch.out +@@ -63,7 +63,8 @@ CREATE TABLE test_tsvector( + a tsvector + ); + \set filename :abs_srcdir '/data/tsearch.data' +-COPY test_tsvector FROM :'filename'; ++\set command '\\copy test_tsvector FROM ' :'filename'; ++:command + ANALYZE test_tsvector; + -- test basic text search behavior without indexes, then with + SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; +diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out +index ba46c32029..eac3017bac 100644 +--- a/src/test/regress/expected/updatable_views.out ++++ b/src/test/regress/expected/updatable_views.out +@@ -999,9 +999,9 @@ NOTICE: drop cascades to 2 other objects + DETAIL: drop cascades to view rw_view1 + drop cascades to function rw_view1_aa(rw_view1) + -- permissions checks +-CREATE USER regress_view_user1; +-CREATE USER regress_view_user2; +-CREATE USER regress_view_user3; ++CREATE USER regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_view_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_view_user1; + CREATE TABLE base_tbl(a int, b text, c float); + INSERT INTO base_tbl VALUES (1, 'Row 1', 1.0); +@@ -3094,8 +3094,8 @@ DETAIL: View columns that are not columns of their base relation are not updata + drop view uv_iocu_view; + drop table uv_iocu_tab; + -- ON CONFLICT DO UPDATE permissions checks +-create user regress_view_user1; +-create user regress_view_user2; ++create user regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++create user regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + set session authorization regress_view_user1; + create table base_tbl(a int unique, b text, c float); + insert into base_tbl values (1,'xxx',1.0); +diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out +index c809f88f54..d1d57852d4 100644 +--- a/src/test/regress/expected/update.out ++++ b/src/test/regress/expected/update.out +@@ -602,7 +602,7 @@ DROP FUNCTION func_parted_mod_b(); + -- RLS policies with update-row-movement + ----------------------------------------- + ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY; +-CREATE USER regress_range_parted_user; ++CREATE USER regress_range_parted_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT ALL ON range_parted, mintab TO regress_range_parted_user; + CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true); + CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0); +diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out +index 4aaf4f025d..40a339758a 100644 +--- a/src/test/regress/expected/vacuum.out ++++ b/src/test/regress/expected/vacuum.out +@@ -433,7 +433,7 @@ CREATE TABLE vacowned (a int); + CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a); + CREATE TABLE vacowned_part1 PARTITION OF vacowned_parted FOR VALUES IN (1); + CREATE TABLE vacowned_part2 PARTITION OF vacowned_parted FOR VALUES IN (2); +-CREATE ROLE regress_vacuum; ++CREATE ROLE regress_vacuum PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET ROLE regress_vacuum; + -- Simple table + VACUUM vacowned; +diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule +index 3d14bf4e4f..87f351b1d1 100644 +--- a/src/test/regress/parallel_schedule ++++ b/src/test/regress/parallel_schedule +@@ -130,4 +130,4 @@ test: fast_default + + # run tablespace test at the end because it drops the tablespace created during + # setup that other tests may use. +-test: tablespace ++#test: tablespace +diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql +index f51726e8ed..8854104eff 100644 +--- a/src/test/regress/sql/aggregates.sql ++++ b/src/test/regress/sql/aggregates.sql +@@ -15,7 +15,8 @@ CREATE TABLE aggtest ( + ); + + \set filename :abs_srcdir '/data/agg.data' +-COPY aggtest FROM :'filename'; ++\set command '\\copy aggtest FROM ' :'filename'; ++:command + + ANALYZE aggtest; + +diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql +index de58d268d3..9d38df7f42 100644 +--- a/src/test/regress/sql/alter_generic.sql ++++ b/src/test/regress/sql/alter_generic.sql +@@ -22,9 +22,9 @@ DROP ROLE IF EXISTS regress_alter_generic_user3; + + RESET client_min_messages; + +-CREATE USER regress_alter_generic_user3; +-CREATE USER regress_alter_generic_user2; +-CREATE USER regress_alter_generic_user1 IN ROLE regress_alter_generic_user3; ++CREATE USER regress_alter_generic_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_alter_generic_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_alter_generic_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE regress_alter_generic_user3; + + CREATE SCHEMA alt_nsp1; + CREATE SCHEMA alt_nsp2; +@@ -316,7 +316,7 @@ DROP OPERATOR FAMILY alt_opf4 USING btree; + + -- Should fail. Need to be SUPERUSER to do ALTER OPERATOR FAMILY .. ADD / DROP + BEGIN TRANSACTION; +-CREATE ROLE regress_alter_generic_user5 NOSUPERUSER; ++CREATE ROLE regress_alter_generic_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER NOSUPERUSER; + CREATE OPERATOR FAMILY alt_opf5 USING btree; + SET ROLE regress_alter_generic_user5; + ALTER OPERATOR FAMILY alt_opf5 USING btree ADD OPERATOR 1 < (int4, int2), FUNCTION 1 btint42cmp(int4, int2); +@@ -326,7 +326,7 @@ ROLLBACK; + + -- Should fail. Need rights to namespace for ALTER OPERATOR FAMILY .. ADD / DROP + BEGIN TRANSACTION; +-CREATE ROLE regress_alter_generic_user6; ++CREATE ROLE regress_alter_generic_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA alt_nsp6; + REVOKE ALL ON SCHEMA alt_nsp6 FROM regress_alter_generic_user6; + CREATE OPERATOR FAMILY alt_nsp6.alt_opf6 USING btree; +diff --git a/src/test/regress/sql/alter_operator.sql b/src/test/regress/sql/alter_operator.sql +index fd40370165..ca8055e06d 100644 +--- a/src/test/regress/sql/alter_operator.sql ++++ b/src/test/regress/sql/alter_operator.sql +@@ -87,7 +87,7 @@ ALTER OPERATOR & (bit, bit) SET ("Restrict" = _int_contsel, "Join" = _int_contjo + -- + -- Test permission check. Must be owner to ALTER OPERATOR. + -- +-CREATE USER regress_alter_op_user; ++CREATE USER regress_alter_op_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_alter_op_user; + + ALTER OPERATOR === (boolean, boolean) SET (RESTRICT = NONE); +diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql +index d2845abc97..a0719b8d0e 100644 +--- a/src/test/regress/sql/alter_table.sql ++++ b/src/test/regress/sql/alter_table.sql +@@ -7,7 +7,7 @@ SET client_min_messages TO 'warning'; + DROP ROLE IF EXISTS regress_alter_table_user1; + RESET client_min_messages; + +-CREATE USER regress_alter_table_user1; ++CREATE USER regress_alter_table_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- + -- add attribute +@@ -2397,8 +2397,8 @@ DROP TABLE fail_part; + ALTER TABLE list_parted ATTACH PARTITION nonexistent FOR VALUES IN (1); + + -- check ownership of the source table +-CREATE ROLE regress_test_me; +-CREATE ROLE regress_test_not_me; ++CREATE ROLE regress_test_me PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_not_me PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE not_owned_by_me (LIKE list_parted); + ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; + SET SESSION AUTHORIZATION regress_test_me; +diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql +index e414fa560d..79a75a0e57 100644 +--- a/src/test/regress/sql/arrays.sql ++++ b/src/test/regress/sql/arrays.sql +@@ -22,7 +22,8 @@ CREATE TABLE array_op_test ( + ); + + \set filename :abs_srcdir '/data/array.data' +-COPY array_op_test FROM :'filename'; ++\set command '\\copy array_op_test FROM ' :'filename'; ++:command + ANALYZE array_op_test; + + -- +diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql +index 239f4a4755..f29d87bdff 100644 +--- a/src/test/regress/sql/btree_index.sql ++++ b/src/test/regress/sql/btree_index.sql +@@ -26,16 +26,20 @@ CREATE TABLE bt_f8_heap ( + ); + + \set filename :abs_srcdir '/data/desc.data' +-COPY bt_i4_heap FROM :'filename'; ++\set command '\\copy bt_i4_heap FROM ' :'filename'; ++:command + + \set filename :abs_srcdir '/data/hash.data' +-COPY bt_name_heap FROM :'filename'; ++\set command '\\copy bt_name_heap FROM ' :'filename'; ++:command + + \set filename :abs_srcdir '/data/desc.data' +-COPY bt_txt_heap FROM :'filename'; ++\set command '\\copy bt_txt_heap FROM ' :'filename'; ++:command + + \set filename :abs_srcdir '/data/hash.data' +-COPY bt_f8_heap FROM :'filename'; ++\set command '\\copy bt_f8_heap FROM ' :'filename'; ++:command + + ANALYZE bt_i4_heap; + ANALYZE bt_name_heap; +diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql +index 6cb9c926c0..5e689e4062 100644 +--- a/src/test/regress/sql/cluster.sql ++++ b/src/test/regress/sql/cluster.sql +@@ -108,7 +108,7 @@ WHERE pg_class.oid=indexrelid + CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index; + + -- Verify that clustering all tables does in fact cluster the right ones +-CREATE USER regress_clstr_user; ++CREATE USER regress_clstr_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE clstr_1 (a INT PRIMARY KEY); + CREATE TABLE clstr_2 (a INT PRIMARY KEY); + CREATE TABLE clstr_3 (a INT PRIMARY KEY); +@@ -233,7 +233,7 @@ DROP TABLE clstrpart; + CREATE TABLE ptnowner(i int unique) PARTITION BY LIST (i); + CREATE INDEX ptnowner_i_idx ON ptnowner(i); + CREATE TABLE ptnowner1 PARTITION OF ptnowner FOR VALUES IN (1); +-CREATE ROLE regress_ptnowner; ++CREATE ROLE regress_ptnowner PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE ptnowner2 PARTITION OF ptnowner FOR VALUES IN (2); + ALTER TABLE ptnowner1 OWNER TO regress_ptnowner; + ALTER TABLE ptnowner OWNER TO regress_ptnowner; +diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql +index 3db9e25913..c66d5aa2c2 100644 +--- a/src/test/regress/sql/collate.icu.utf8.sql ++++ b/src/test/regress/sql/collate.icu.utf8.sql +@@ -353,7 +353,7 @@ reset enable_seqscan; + + -- schema manipulation commands + +-CREATE ROLE regress_test_role; ++CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA test_schema; + + -- We need to do this this way to cope with varying names for encodings: +diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql +index e3e3bea709..fa86ddc326 100644 +--- a/src/test/regress/sql/constraints.sql ++++ b/src/test/regress/sql/constraints.sql +@@ -243,12 +243,14 @@ CREATE TABLE COPY_TBL (x INT, y TEXT, z INT, + CHECK (x > 3 AND y <> 'check failed' AND x < 7 )); + + \set filename :abs_srcdir '/data/constro.data' +-COPY COPY_TBL FROM :'filename'; ++\set command '\\copy COPY_TBL FROM ' :'filename'; ++:command + + SELECT * FROM COPY_TBL; + + \set filename :abs_srcdir '/data/constrf.data' +-COPY COPY_TBL FROM :'filename'; ++\set command '\\copy COPY_TBL FROM ' :'filename'; ++:command + + SELECT * FROM COPY_TBL; + +@@ -599,7 +601,7 @@ DROP TABLE deferred_excl; + + -- Comments + -- Setup a low-level role to enforce non-superuser checks. +-CREATE ROLE regress_constraint_comments; ++CREATE ROLE regress_constraint_comments PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_constraint_comments; + + CREATE TABLE constraint_comments_tbl (a int CONSTRAINT the_constraint CHECK (a > 0)); +@@ -621,7 +623,7 @@ COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS NULL; + + -- unauthorized user + RESET SESSION AUTHORIZATION; +-CREATE ROLE regress_constraint_comments_noaccess; ++CREATE ROLE regress_constraint_comments_noaccess PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_constraint_comments_noaccess; + COMMENT ON CONSTRAINT the_constraint ON constraint_comments_tbl IS 'no, the comment'; + COMMENT ON CONSTRAINT the_constraint ON DOMAIN constraint_comments_dom IS 'no, another comment'; +diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql +index 9a65fca91f..58431a3056 100644 +--- a/src/test/regress/sql/conversion.sql ++++ b/src/test/regress/sql/conversion.sql +@@ -12,7 +12,7 @@ CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, r + AS :'regresslib', 'test_enc_conversion' + LANGUAGE C STRICT; + +-CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE; ++CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_conversion_user; + CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8; + -- +diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql +index 43d2e906dd..6c993d70f0 100644 +--- a/src/test/regress/sql/copy.sql ++++ b/src/test/regress/sql/copy.sql +@@ -20,11 +20,13 @@ insert into copytest values('Mac',E'abc\rdef',3); + insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); + + \set filename :abs_builddir '/results/copytest.csv' +-copy copytest to :'filename' csv; ++\set command '\\copy copytest to ' :'filename' csv; ++:command + + create temp table copytest2 (like copytest); + +-copy copytest2 from :'filename' csv; ++\set command '\\copy copytest2 from ' :'filename' csv; ++:command + + select * from copytest except select * from copytest2; + +@@ -32,9 +34,11 @@ truncate copytest2; + + --- same test but with an escape char different from quote char + +-copy copytest to :'filename' csv quote '''' escape E'\\'; ++\set command '\\copy copytest to ' :'filename' ' csv quote ' '\'\'\'\'' ' escape ' 'E\'' '\\\\\''; ++:command + +-copy copytest2 from :'filename' csv quote '''' escape E'\\'; ++\set command '\\copy copytest2 from ' :'filename' ' csv quote ' '\'\'\'\'' ' escape ' 'E\'' '\\\\\''; ++:command + + select * from copytest except select * from copytest2; + +@@ -86,16 +90,19 @@ insert into parted_copytest select x,2,'Two' from generate_series(1001,1010) x; + insert into parted_copytest select x,1,'One' from generate_series(1011,1020) x; + + \set filename :abs_builddir '/results/parted_copytest.csv' +-copy (select * from parted_copytest order by a) to :'filename'; ++\set command '\\copy (select * from parted_copytest order by a) to ' :'filename'; ++:command + + truncate parted_copytest; + +-copy parted_copytest from :'filename'; ++\set command '\\copy parted_copytest from ' :'filename'; ++:command + + -- Ensure COPY FREEZE errors for partitioned tables. + begin; + truncate parted_copytest; +-copy parted_copytest from :'filename' (freeze); ++\set command '\\copy parted_copytest from ' :'filename' (freeze); ++:command + rollback; + + select tableoid::regclass,count(*),sum(a) from parted_copytest +@@ -115,7 +122,8 @@ create trigger part_ins_trig + for each row + execute procedure part_ins_func(); + +-copy parted_copytest from :'filename'; ++\set command '\\copy parted_copytest from ' :'filename'; ++:command + + select tableoid::regclass,count(*),sum(a) from parted_copytest + group by tableoid order by tableoid::regclass::name; +@@ -124,7 +132,8 @@ truncate table parted_copytest; + create index on parted_copytest (b); + drop trigger part_ins_trig on parted_copytest_a2; + +-copy parted_copytest from stdin; ++\set command '\\copy parted_copytest from ' stdin; ++:command + 1 1 str1 + 2 2 str2 + \. +@@ -191,8 +200,8 @@ bill 20 (11,10) 1000 sharon + -- Generate COPY FROM report with FILE, with some excluded tuples. + truncate tab_progress_reporting; + \set filename :abs_srcdir '/data/emp.data' +-copy tab_progress_reporting from :'filename' +- where (salary < 2000); ++\set command '\\copy tab_progress_reporting from ' :'filename' 'where (salary < 2000)'; ++:command + + drop trigger check_after_tab_progress_reporting on tab_progress_reporting; + drop function notice_after_tab_progress_reporting(); +@@ -311,7 +320,8 @@ CREATE TABLE parted_si_p_odd PARTITION OF parted_si FOR VALUES IN (1); + -- https://postgr.es/m/18130-7a86a7356a75209d%40postgresql.org + -- https://postgr.es/m/257696.1695670946%40sss.pgh.pa.us + \set filename :abs_srcdir '/data/desc.data' +-COPY parted_si(id, data) FROM :'filename'; ++\set command '\\COPY parted_si(id, data) FROM ' :'filename'; ++:command + + -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from + -- the wrong partition. This test is *not* guaranteed to trigger that bug, but +diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql +index d759635068..d58e50dcc5 100644 +--- a/src/test/regress/sql/copy2.sql ++++ b/src/test/regress/sql/copy2.sql +@@ -365,8 +365,8 @@ copy check_con_tbl from stdin; + select * from check_con_tbl; + + -- test with RLS enabled. +-CREATE ROLE regress_rls_copy_user; +-CREATE ROLE regress_rls_copy_user_colperms; ++CREATE ROLE regress_rls_copy_user PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_copy_user_colperms PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE rls_t1 (a int, b int, c int); + + COPY rls_t1 (a, b, c) from stdin; +diff --git a/src/test/regress/sql/create_function_sql.sql b/src/test/regress/sql/create_function_sql.sql +index 89e9af3a49..2b86fe2285 100644 +--- a/src/test/regress/sql/create_function_sql.sql ++++ b/src/test/regress/sql/create_function_sql.sql +@@ -6,7 +6,7 @@ + + -- All objects made in this test are in temp_func_test schema + +-CREATE USER regress_unpriv_user; ++CREATE USER regress_unpriv_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE SCHEMA temp_func_test; + GRANT ALL ON SCHEMA temp_func_test TO public; +diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql +index d49ce9f300..47fa813bc8 100644 +--- a/src/test/regress/sql/create_index.sql ++++ b/src/test/regress/sql/create_index.sql +@@ -71,7 +71,8 @@ CREATE TABLE fast_emp4000 ( + ); + + \set filename :abs_srcdir '/data/rect.data' +-COPY slow_emp4000 FROM :'filename'; ++\set command '\\copy slow_emp4000 FROM ' :'filename'; ++:command + + INSERT INTO fast_emp4000 SELECT * FROM slow_emp4000; + +@@ -269,7 +270,8 @@ CREATE TABLE array_index_op_test ( + ); + + \set filename :abs_srcdir '/data/array.data' +-COPY array_index_op_test FROM :'filename'; ++\set command '\\copy array_index_op_test FROM ' :'filename'; ++:command + ANALYZE array_index_op_test; + + SELECT * FROM array_index_op_test WHERE i = '{NULL}' ORDER BY seqno; +@@ -1246,7 +1248,7 @@ END; + REINDEX SCHEMA CONCURRENTLY schema_to_reindex; + + -- Failure for unauthorized user +-CREATE ROLE regress_reindexuser NOLOGIN; ++CREATE ROLE regress_reindexuser NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION ROLE regress_reindexuser; + REINDEX SCHEMA schema_to_reindex; + -- Permission failures with toast tables and indexes (pg_authid here) +diff --git a/src/test/regress/sql/create_procedure.sql b/src/test/regress/sql/create_procedure.sql +index 069a3727ce..faeeb3f744 100644 +--- a/src/test/regress/sql/create_procedure.sql ++++ b/src/test/regress/sql/create_procedure.sql +@@ -255,7 +255,7 @@ DROP PROCEDURE nonexistent(); + + -- privileges + +-CREATE USER regress_cp_user1; ++CREATE USER regress_cp_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT INSERT ON cp_test TO regress_cp_user1; + REVOKE EXECUTE ON PROCEDURE ptest1(text) FROM PUBLIC; + SET ROLE regress_cp_user1; +diff --git a/src/test/regress/sql/create_role.sql b/src/test/regress/sql/create_role.sql +index 4491a28a8a..3045434865 100644 +--- a/src/test/regress/sql/create_role.sql ++++ b/src/test/regress/sql/create_role.sql +@@ -1,20 +1,20 @@ + -- ok, superuser can create users with any set of privileges +-CREATE ROLE regress_role_super SUPERUSER; +-CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS; ++CREATE ROLE regress_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_role_admin CREATEDB CREATEROLE REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT CREATE ON DATABASE regression TO regress_role_admin WITH GRANT OPTION; +-CREATE ROLE regress_role_limited_admin CREATEROLE; +-CREATE ROLE regress_role_normal; ++CREATE ROLE regress_role_limited_admin CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_role_normal PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, CREATEROLE user can't give away role attributes without having them + SET SESSION AUTHORIZATION regress_role_limited_admin; +-CREATE ROLE regress_nosuch_superuser SUPERUSER; +-CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS; +-CREATE ROLE regress_nosuch_replication REPLICATION; +-CREATE ROLE regress_nosuch_bypassrls BYPASSRLS; +-CREATE ROLE regress_nosuch_createdb CREATEDB; ++CREATE ROLE regress_nosuch_superuser SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_nosuch_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_nosuch_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_nosuch_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_nosuch_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, can create a role without any special attributes +-CREATE ROLE regress_role_limited; ++CREATE ROLE regress_role_limited PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, can't give it in any of the restricted attributes + ALTER ROLE regress_role_limited SUPERUSER; +@@ -25,10 +25,10 @@ DROP ROLE regress_role_limited; + + -- ok, can give away these role attributes if you have them + SET SESSION AUTHORIZATION regress_role_admin; +-CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS; +-CREATE ROLE regress_replication REPLICATION; +-CREATE ROLE regress_bypassrls BYPASSRLS; +-CREATE ROLE regress_createdb CREATEDB; ++CREATE ROLE regress_replication_bypassrls REPLICATION BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_replication REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_bypassrls BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_createdb CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, can toggle these role attributes off and on if you have them + ALTER ROLE regress_replication NOREPLICATION; +@@ -43,52 +43,52 @@ ALTER ROLE regress_createdb SUPERUSER; + ALTER ROLE regress_createdb NOSUPERUSER; + + -- ok, having CREATEROLE is enough to create users with these privileges +-CREATE ROLE regress_createrole CREATEROLE NOINHERIT; ++CREATE ROLE regress_createrole CREATEROLE NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT CREATE ON DATABASE regression TO regress_createrole WITH GRANT OPTION; +-CREATE ROLE regress_login LOGIN; +-CREATE ROLE regress_inherit INHERIT; +-CREATE ROLE regress_connection_limit CONNECTION LIMIT 5; +-CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD 'foo'; +-CREATE ROLE regress_password_null PASSWORD NULL; ++CREATE ROLE regress_login LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_inherit INHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_connection_limit CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_encrypted_password ENCRYPTED PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, backwards compatible noise words should be ignored +-CREATE ROLE regress_noiseword SYSID 12345; ++CREATE ROLE regress_noiseword SYSID 12345 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, cannot grant membership in superuser role +-CREATE ROLE regress_nosuch_super IN ROLE regress_role_super; ++CREATE ROLE regress_nosuch_super IN ROLE regress_role_super PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, database owner cannot have members +-CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner; ++CREATE ROLE regress_nosuch_dbowner IN ROLE pg_database_owner PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, can grant other users into a role + CREATE ROLE regress_inroles ROLE + regress_role_super, regress_createdb, regress_createrole, regress_login, +- regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null; ++ regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, cannot grant a role into itself +-CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive; ++CREATE ROLE regress_nosuch_recursive ROLE regress_nosuch_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, can grant other users into a role with admin option + CREATE ROLE regress_adminroles ADMIN + regress_role_super, regress_createdb, regress_createrole, regress_login, +- regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null; ++ regress_inherit, regress_connection_limit, regress_encrypted_password, regress_password_null PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, cannot grant a role into itself with admin option +-CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive; ++CREATE ROLE regress_nosuch_admin_recursive ADMIN regress_nosuch_admin_recursive PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- fail, regress_createrole does not have CREATEDB privilege + SET SESSION AUTHORIZATION regress_createrole; + CREATE DATABASE regress_nosuch_db; + + -- ok, regress_createrole can create new roles +-CREATE ROLE regress_plainrole; ++CREATE ROLE regress_plainrole PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, roles with CREATEROLE can create new roles with it +-CREATE ROLE regress_rolecreator CREATEROLE; ++CREATE ROLE regress_rolecreator CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, roles with CREATEROLE can create new roles with different role + -- attributes, including CREATEROLE +-CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5; ++CREATE ROLE regress_hasprivs CREATEROLE LOGIN INHERIT CONNECTION LIMIT 5 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- ok, we should be able to modify a role we created + COMMENT ON ROLE regress_hasprivs IS 'some comment'; +@@ -123,7 +123,7 @@ REASSIGN OWNED BY regress_tenant TO regress_createrole; + + -- ok, create a role with a value for createrole_self_grant + SET createrole_self_grant = 'set, inherit'; +-CREATE ROLE regress_tenant2; ++CREATE ROLE regress_tenant2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT CREATE ON DATABASE regression TO regress_tenant2; + + -- ok, regress_tenant2 can create objects within the database +@@ -150,16 +150,16 @@ ALTER TABLE tenant2_table OWNER TO regress_tenant2; + DROP TABLE tenant2_table; + + -- fail, CREATEROLE is not enough to create roles in privileged roles +-CREATE ROLE regress_read_all_data IN ROLE pg_read_all_data; +-CREATE ROLE regress_write_all_data IN ROLE pg_write_all_data; +-CREATE ROLE regress_monitor IN ROLE pg_monitor; +-CREATE ROLE regress_read_all_settings IN ROLE pg_read_all_settings; +-CREATE ROLE regress_read_all_stats IN ROLE pg_read_all_stats; +-CREATE ROLE regress_stat_scan_tables IN ROLE pg_stat_scan_tables; +-CREATE ROLE regress_read_server_files IN ROLE pg_read_server_files; +-CREATE ROLE regress_write_server_files IN ROLE pg_write_server_files; +-CREATE ROLE regress_execute_server_program IN ROLE pg_execute_server_program; +-CREATE ROLE regress_signal_backend IN ROLE pg_signal_backend; ++CREATE ROLE regress_read_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_data; ++CREATE ROLE regress_write_all_data PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_all_data; ++CREATE ROLE regress_monitor PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_monitor; ++CREATE ROLE regress_read_all_settings PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_settings; ++CREATE ROLE regress_read_all_stats PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_all_stats; ++CREATE ROLE regress_stat_scan_tables PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_stat_scan_tables; ++CREATE ROLE regress_read_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_read_server_files; ++CREATE ROLE regress_write_server_files PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_write_server_files; ++CREATE ROLE regress_execute_server_program PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_execute_server_program; ++CREATE ROLE regress_signal_backend PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_signal_backend; + + -- fail, role still owns database objects + DROP ROLE regress_tenant; +diff --git a/src/test/regress/sql/create_schema.sql b/src/test/regress/sql/create_schema.sql +index 1b7064247a..be5b662ce1 100644 +--- a/src/test/regress/sql/create_schema.sql ++++ b/src/test/regress/sql/create_schema.sql +@@ -4,7 +4,7 @@ + + -- Schema creation with elements. + +-CREATE ROLE regress_create_schema_role SUPERUSER; ++CREATE ROLE regress_create_schema_role SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Cases where schema creation fails as objects are qualified with a schema + -- that does not match with what's expected. +diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql +index 3a78be1b0c..617d2dc8d6 100644 +--- a/src/test/regress/sql/create_view.sql ++++ b/src/test/regress/sql/create_view.sql +@@ -23,7 +23,8 @@ CREATE TABLE real_city ( + ); + + \set filename :abs_srcdir '/data/real_city.data' +-COPY real_city FROM :'filename'; ++\set command '\\copy real_city FROM ' :'filename'; ++:command + ANALYZE real_city; + + SELECT * +diff --git a/src/test/regress/sql/database.sql b/src/test/regress/sql/database.sql +index 0367c0e37a..a23b98c4bd 100644 +--- a/src/test/regress/sql/database.sql ++++ b/src/test/regress/sql/database.sql +@@ -1,8 +1,6 @@ + CREATE DATABASE regression_tbd + ENCODING utf8 LC_COLLATE "C" LC_CTYPE "C" TEMPLATE template0; + ALTER DATABASE regression_tbd RENAME TO regression_utf8; +-ALTER DATABASE regression_utf8 SET TABLESPACE regress_tblspace; +-ALTER DATABASE regression_utf8 RESET TABLESPACE; + ALTER DATABASE regression_utf8 CONNECTION_LIMIT 123; + + -- Test PgDatabaseToastTable. Doing this with GRANT would be slow. +diff --git a/src/test/regress/sql/dependency.sql b/src/test/regress/sql/dependency.sql +index 2559c62d0b..06c3aa1a36 100644 +--- a/src/test/regress/sql/dependency.sql ++++ b/src/test/regress/sql/dependency.sql +@@ -2,10 +2,10 @@ + -- DEPENDENCIES + -- + +-CREATE USER regress_dep_user; +-CREATE USER regress_dep_user2; +-CREATE USER regress_dep_user3; +-CREATE GROUP regress_dep_group; ++CREATE USER regress_dep_user PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE GROUP regress_dep_group PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE TABLE deptest (f1 serial primary key, f2 text); + +@@ -45,9 +45,9 @@ DROP TABLE deptest; + DROP USER regress_dep_user3; + + -- Test DROP OWNED +-CREATE USER regress_dep_user0; +-CREATE USER regress_dep_user1; +-CREATE USER regress_dep_user2; ++CREATE USER regress_dep_user0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_dep_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_dep_user0; + -- permission denied + DROP OWNED BY regress_dep_user1; +diff --git a/src/test/regress/sql/drop_if_exists.sql b/src/test/regress/sql/drop_if_exists.sql +index ac6168b91f..4270062ec7 100644 +--- a/src/test/regress/sql/drop_if_exists.sql ++++ b/src/test/regress/sql/drop_if_exists.sql +@@ -86,9 +86,9 @@ DROP DOMAIN test_domain_exists; + --- role/user/group + --- + +-CREATE USER regress_test_u1; +-CREATE ROLE regress_test_r1; +-CREATE GROUP regress_test_g1; ++CREATE USER regress_test_u1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_r1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE GROUP regress_test_g1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + DROP USER regress_test_u2; + +diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql +index 247b0a3105..bf018fd3a1 100644 +--- a/src/test/regress/sql/equivclass.sql ++++ b/src/test/regress/sql/equivclass.sql +@@ -230,7 +230,7 @@ set enable_mergejoin = off; + alter table ec1 enable row level security; + create policy p1 on ec1 using (f1 < '5'::int8alias1); + +-create user regress_user_ectest; ++create user regress_user_ectest PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant select on ec0 to regress_user_ectest; + grant select on ec1 to regress_user_ectest; + +diff --git a/src/test/regress/sql/event_trigger.sql b/src/test/regress/sql/event_trigger.sql +index 1aeaddbe71..89a410ec4a 100644 +--- a/src/test/regress/sql/event_trigger.sql ++++ b/src/test/regress/sql/event_trigger.sql +@@ -86,7 +86,7 @@ create event trigger regress_event_trigger2 on ddl_command_start + comment on event trigger regress_event_trigger is 'test comment'; + + -- drop as non-superuser should fail +-create role regress_evt_user; ++create role regress_evt_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + set role regress_evt_user; + create event trigger regress_event_trigger_noperms on ddl_command_start + execute procedure test_event_trigger(); +diff --git a/src/test/regress/sql/foreign_data.sql b/src/test/regress/sql/foreign_data.sql +index aa147b14a9..370e0dd570 100644 +--- a/src/test/regress/sql/foreign_data.sql ++++ b/src/test/regress/sql/foreign_data.sql +@@ -22,14 +22,14 @@ DROP ROLE IF EXISTS regress_foreign_data_user, regress_test_role, regress_test_r + + RESET client_min_messages; + +-CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER; ++CREATE ROLE regress_foreign_data_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION 'regress_foreign_data_user'; + +-CREATE ROLE regress_test_role; +-CREATE ROLE regress_test_role2; +-CREATE ROLE regress_test_role_super SUPERUSER; +-CREATE ROLE regress_test_indirect; +-CREATE ROLE regress_unprivileged_role; ++CREATE ROLE regress_test_role PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_role_super SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_test_indirect PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_unprivileged_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE FOREIGN DATA WRAPPER dummy; + COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless'; +diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql +index 22e177f89b..7138d5e1d4 100644 +--- a/src/test/regress/sql/foreign_key.sql ++++ b/src/test/regress/sql/foreign_key.sql +@@ -1418,7 +1418,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2 + -- leave these tables around intentionally + + -- test the case when the referenced table is owned by a different user +-create role regress_other_partitioned_fk_owner; ++create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant references on fk_notpartitioned_pk to regress_other_partitioned_fk_owner; + set role regress_other_partitioned_fk_owner; + create table other_partitioned_fk(a int, b int) partition by list (a); +diff --git a/src/test/regress/sql/generated.sql b/src/test/regress/sql/generated.sql +index 298f6b3aa8..f058913ae0 100644 +--- a/src/test/regress/sql/generated.sql ++++ b/src/test/regress/sql/generated.sql +@@ -263,7 +263,7 @@ ALTER TABLE gtest10a DROP COLUMN b; + INSERT INTO gtest10a (a) VALUES (1); + + -- privileges +-CREATE USER regress_user11; ++CREATE USER regress_user11 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE TABLE gtest11s (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED); + INSERT INTO gtest11s VALUES (1, 10), (2, 20); +diff --git a/src/test/regress/sql/guc.sql b/src/test/regress/sql/guc.sql +index dc79761955..a9ead75349 100644 +--- a/src/test/regress/sql/guc.sql ++++ b/src/test/regress/sql/guc.sql +@@ -188,7 +188,7 @@ PREPARE foo AS SELECT 1; + LISTEN foo_event; + SET vacuum_cost_delay = 13; + CREATE TEMP TABLE tmp_foo (data text) ON COMMIT DELETE ROWS; +-CREATE ROLE regress_guc_user; ++CREATE ROLE regress_guc_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_guc_user; + -- look changes + SELECT pg_listening_channels(); +diff --git a/src/test/regress/sql/hash_index.sql b/src/test/regress/sql/hash_index.sql +index 527024f710..de49c0b85f 100644 +--- a/src/test/regress/sql/hash_index.sql ++++ b/src/test/regress/sql/hash_index.sql +@@ -26,10 +26,14 @@ CREATE TABLE hash_f8_heap ( + ); + + \set filename :abs_srcdir '/data/hash.data' +-COPY hash_i4_heap FROM :'filename'; +-COPY hash_name_heap FROM :'filename'; +-COPY hash_txt_heap FROM :'filename'; +-COPY hash_f8_heap FROM :'filename'; ++\set command '\\copy hash_i4_heap FROM ' :'filename'; ++:command ++\set command '\\copy hash_name_heap FROM ' :'filename'; ++:command ++\set command '\\copy hash_txt_heap FROM ' :'filename'; ++:command ++\set command '\\copy hash_f8_heap FROM ' :'filename'; ++:command + + -- the data in this file has a lot of duplicates in the index key + -- fields, leading to long bucket chains and lots of table expansion. +diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql +index 91d2e443b4..241c93f373 100644 +--- a/src/test/regress/sql/identity.sql ++++ b/src/test/regress/sql/identity.sql +@@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART; + ALTER TABLE itest7 ALTER COLUMN a DROP IDENTITY; + + -- privileges +-CREATE USER regress_identity_user1; ++CREATE USER regress_identity_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE itest8 (a int GENERATED ALWAYS AS IDENTITY, b text); + GRANT SELECT, INSERT ON itest8 TO regress_identity_user1; + SET ROLE regress_identity_user1; +diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql +index fe699c54d5..bdd5993f45 100644 +--- a/src/test/regress/sql/inherit.sql ++++ b/src/test/regress/sql/inherit.sql +@@ -950,7 +950,7 @@ create index on permtest_parent (left(c, 3)); + insert into permtest_parent + select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i; + analyze permtest_parent; +-create role regress_no_child_access; ++create role regress_no_child_access PASSWORD NEON_PASSWORD_PLACEHOLDER; + revoke all on permtest_grandchild from regress_no_child_access; + grant select on permtest_parent to regress_no_child_access; + set session authorization regress_no_child_access; +diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql +index 2b086eeb6d..913d8a0aed 100644 +--- a/src/test/regress/sql/insert.sql ++++ b/src/test/regress/sql/insert.sql +@@ -513,7 +513,7 @@ drop table mlparted5; + create table key_desc (a int, b int) partition by list ((a+0)); + create table key_desc_1 partition of key_desc for values in (1) partition by range (b); + +-create user regress_insert_other_user; ++create user regress_insert_other_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant select (a) on key_desc_1 to regress_insert_other_user; + grant insert on key_desc to regress_insert_other_user; + +@@ -597,7 +597,7 @@ insert into brtrigpartcon1 values (1, 'hi there'); + -- check that the message shows the appropriate column description in a + -- situation where the partitioned table is not the primary ModifyTable node + create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int); +-create role regress_coldesc_role; ++create role regress_coldesc_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + grant insert on inserttest3 to regress_coldesc_role; + grant insert on brtrigpartcon to regress_coldesc_role; + revoke select on brtrigpartcon from regress_coldesc_role; +diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql +index 6dae715afd..aa320ba7be 100644 +--- a/src/test/regress/sql/jsonb.sql ++++ b/src/test/regress/sql/jsonb.sql +@@ -6,7 +6,8 @@ CREATE TABLE testjsonb ( + ); + + \set filename :abs_srcdir '/data/jsonb.data' +-COPY testjsonb FROM :'filename'; ++\set command '\\copy testjsonb FROM ' :'filename'; ++:command + + -- Strings. + SELECT '""'::jsonb; -- OK. +diff --git a/src/test/regress/sql/largeobject.sql b/src/test/regress/sql/largeobject.sql +index a4aee02e3a..8839c9496a 100644 +--- a/src/test/regress/sql/largeobject.sql ++++ b/src/test/regress/sql/largeobject.sql +@@ -10,7 +10,7 @@ + SET bytea_output TO escape; + + -- Test ALTER LARGE OBJECT OWNER +-CREATE ROLE regress_lo_user; ++CREATE ROLE regress_lo_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + SELECT lo_create(42); + ALTER LARGE OBJECT 42 OWNER TO regress_lo_user; + +@@ -189,7 +189,8 @@ SELECT lo_unlink(loid) from lotest_stash_values; + TRUNCATE lotest_stash_values; + + \set filename :abs_srcdir '/data/tenk.data' +-INSERT INTO lotest_stash_values (loid) SELECT lo_import(:'filename'); ++\lo_import :filename ++INSERT INTO lotest_stash_values (loid) VALUES (:LASTOID); + + BEGIN; + UPDATE lotest_stash_values SET fd=lo_open(loid, CAST(x'20000' | x'40000' AS integer)); +@@ -219,8 +220,8 @@ SELECT lo_close(fd) FROM lotest_stash_values; + END; + + \set filename :abs_builddir '/results/lotest.txt' +-SELECT lo_export(loid, :'filename') FROM lotest_stash_values; +- ++SELECT loid FROM lotest_stash_values \gset ++\lo_export :loid, :filename + \lo_import :filename + + \set newloid :LASTOID +diff --git a/src/test/regress/sql/lock.sql b/src/test/regress/sql/lock.sql +index b88488c6d0..78b31e6dd3 100644 +--- a/src/test/regress/sql/lock.sql ++++ b/src/test/regress/sql/lock.sql +@@ -19,7 +19,7 @@ CREATE VIEW lock_view3 AS SELECT * from lock_view2; + CREATE VIEW lock_view4 AS SELECT (select a from lock_tbl1a limit 1) from lock_tbl1; + CREATE VIEW lock_view5 AS SELECT * from lock_tbl1 where a in (select * from lock_tbl1a); + CREATE VIEW lock_view6 AS SELECT * from (select * from lock_tbl1) sub; +-CREATE ROLE regress_rol_lock1; ++CREATE ROLE regress_rol_lock1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER ROLE regress_rol_lock1 SET search_path = lock_schema1; + GRANT USAGE ON SCHEMA lock_schema1 TO regress_rol_lock1; + +diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql +index 235123de1e..58e73cec5d 100644 +--- a/src/test/regress/sql/matview.sql ++++ b/src/test/regress/sql/matview.sql +@@ -209,7 +209,7 @@ SELECT * FROM mvtest_mv_v; + DROP TABLE mvtest_v CASCADE; + + -- make sure running as superuser works when MV owned by another role (bug #11208) +-CREATE ROLE regress_user_mvtest; ++CREATE ROLE regress_user_mvtest PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET ROLE regress_user_mvtest; + -- this test case also checks for ambiguity in the queries issued by + -- refresh_by_match_merge(), by choosing column names that intentionally +@@ -264,7 +264,7 @@ ROLLBACK; + + -- INSERT privileges if relation owner is not allowed to insert. + CREATE SCHEMA matview_schema; +-CREATE USER regress_matview_user; ++CREATE USER regress_matview_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user + REVOKE INSERT ON TABLES FROM regress_matview_user; + GRANT ALL ON SCHEMA matview_schema TO public; +diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql +index 2a220a248f..91a404d51e 100644 +--- a/src/test/regress/sql/merge.sql ++++ b/src/test/regress/sql/merge.sql +@@ -2,9 +2,9 @@ + -- MERGE + -- + +-CREATE USER regress_merge_privs; +-CREATE USER regress_merge_no_privs; +-CREATE USER regress_merge_none; ++CREATE USER regress_merge_privs PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_merge_no_privs PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_merge_none PASSWORD NEON_PASSWORD_PLACEHOLDER; + + DROP TABLE IF EXISTS target; + DROP TABLE IF EXISTS source; +diff --git a/src/test/regress/sql/misc.sql b/src/test/regress/sql/misc.sql +index 165a2e175f..08d7096e2c 100644 +--- a/src/test/regress/sql/misc.sql ++++ b/src/test/regress/sql/misc.sql +@@ -74,22 +74,26 @@ DROP TABLE tmp; + -- copy + -- + \set filename :abs_builddir '/results/onek.data' +-COPY onek TO :'filename'; ++\set command '\\copy onek TO ' :'filename'; ++:command + + CREATE TEMP TABLE onek_copy (LIKE onek); + +-COPY onek_copy FROM :'filename'; ++\set command '\\copy onek_copy FROM ' :'filename'; ++:command + + SELECT * FROM onek EXCEPT ALL SELECT * FROM onek_copy; + + SELECT * FROM onek_copy EXCEPT ALL SELECT * FROM onek; + + \set filename :abs_builddir '/results/stud_emp.data' +-COPY BINARY stud_emp TO :'filename'; ++\set command '\\COPY BINARY stud_emp TO ' :'filename'; ++:command + + CREATE TEMP TABLE stud_emp_copy (LIKE stud_emp); + +-COPY BINARY stud_emp_copy FROM :'filename'; ++\set command '\\COPY BINARY stud_emp_copy FROM ' :'filename'; ++:command + + SELECT * FROM stud_emp_copy; + +diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql +index b57f01f3e9..3e05aa6400 100644 +--- a/src/test/regress/sql/misc_functions.sql ++++ b/src/test/regress/sql/misc_functions.sql +@@ -82,7 +82,7 @@ SELECT pg_log_backend_memory_contexts(pg_backend_pid()); + SELECT pg_log_backend_memory_contexts(pid) FROM pg_stat_activity + WHERE backend_type = 'checkpointer'; + +-CREATE ROLE regress_log_memory; ++CREATE ROLE regress_log_memory PASSWORD NEON_PASSWORD_PLACEHOLDER; + + SELECT has_function_privilege('regress_log_memory', + 'pg_log_backend_memory_contexts(integer)', 'EXECUTE'); -- no +@@ -169,7 +169,7 @@ select count(*) > 0 from + -- + -- Test replication slot directory functions + -- +-CREATE ROLE regress_slot_dir_funcs; ++CREATE ROLE regress_slot_dir_funcs PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- Not available by default. + SELECT has_function_privilege('regress_slot_dir_funcs', + 'pg_ls_logicalsnapdir()', 'EXECUTE'); +diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql +index 1a6c61f49d..1c31ac6a53 100644 +--- a/src/test/regress/sql/object_address.sql ++++ b/src/test/regress/sql/object_address.sql +@@ -7,7 +7,7 @@ SET client_min_messages TO 'warning'; + DROP ROLE IF EXISTS regress_addr_user; + RESET client_min_messages; + +-CREATE USER regress_addr_user; ++CREATE USER regress_addr_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Test generic object addressing/identification functions + CREATE SCHEMA addr_nsp; +diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql +index 53e86b0b6c..f07cf1ec54 100644 +--- a/src/test/regress/sql/password.sql ++++ b/src/test/regress/sql/password.sql +@@ -10,11 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok + + -- consistency of password entries + SET password_encryption = 'md5'; +-CREATE ROLE regress_passwd1 PASSWORD 'role_pwd1'; +-CREATE ROLE regress_passwd2 PASSWORD 'role_pwd2'; ++CREATE ROLE regress_passwd1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET password_encryption = 'scram-sha-256'; +-CREATE ROLE regress_passwd3 PASSWORD 'role_pwd3'; +-CREATE ROLE regress_passwd4 PASSWORD NULL; ++CREATE ROLE regress_passwd3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- check list of created entries + -- +@@ -42,14 +42,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2; + SET password_encryption = 'md5'; + + -- encrypt with MD5 +-ALTER ROLE regress_passwd2 PASSWORD 'foo'; ++ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- already encrypted, use as they are + ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70'; + ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo='; + + SET password_encryption = 'scram-sha-256'; + -- create SCRAM secret +-ALTER ROLE regress_passwd4 PASSWORD 'foo'; ++ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER; + -- already encrypted with MD5, use as it is + CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; + +diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql +index 3f68cafcd1..004b26831d 100644 +--- a/src/test/regress/sql/privileges.sql ++++ b/src/test/regress/sql/privileges.sql +@@ -24,18 +24,18 @@ RESET client_min_messages; + + -- test proper begins here + +-CREATE USER regress_priv_user1; +-CREATE USER regress_priv_user2; +-CREATE USER regress_priv_user3; +-CREATE USER regress_priv_user4; +-CREATE USER regress_priv_user5; +-CREATE USER regress_priv_user5; -- duplicate +-CREATE USER regress_priv_user6; +-CREATE USER regress_priv_user7; +-CREATE USER regress_priv_user8; +-CREATE USER regress_priv_user9; +-CREATE USER regress_priv_user10; +-CREATE ROLE regress_priv_role; ++CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER; -- duplicate ++CREATE USER regress_priv_user6 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user7 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user8 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user9 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user10 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_priv_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- circular ADMIN OPTION grants should be disallowed + GRANT regress_priv_user1 TO regress_priv_user2 WITH ADMIN OPTION; +@@ -84,11 +84,11 @@ DROP ROLE regress_priv_user5; -- should fail, dependency + DROP ROLE regress_priv_user1, regress_priv_user5; -- ok, despite order + + -- recreate the roles we just dropped +-CREATE USER regress_priv_user1; +-CREATE USER regress_priv_user2; +-CREATE USER regress_priv_user3; +-CREATE USER regress_priv_user4; +-CREATE USER regress_priv_user5; ++CREATE USER regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user4 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_priv_user5 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + GRANT pg_read_all_data TO regress_priv_user6; + GRANT pg_write_all_data TO regress_priv_user7; +@@ -130,8 +130,8 @@ DROP USER regress_priv_user10; + DROP USER regress_priv_user9; + DROP USER regress_priv_user8; + +-CREATE GROUP regress_priv_group1; +-CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 USER regress_priv_user2; ++CREATE GROUP regress_priv_group1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE GROUP regress_priv_group2 WITH ADMIN regress_priv_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER USER regress_priv_user2; + + ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4; + +@@ -1124,7 +1124,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP + + -- security-restricted operations + \c - +-CREATE ROLE regress_sro_user; ++CREATE ROLE regress_sro_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- Check that index expressions and predicates are run as the table's owner + +@@ -1620,8 +1620,8 @@ DROP SCHEMA testns CASCADE; + -- Change owner of the schema & and rename of new schema owner + \c - + +-CREATE ROLE regress_schemauser1 superuser login; +-CREATE ROLE regress_schemauser2 superuser login; ++CREATE ROLE regress_schemauser1 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_schemauser2 superuser login PASSWORD NEON_PASSWORD_PLACEHOLDER; + + SET SESSION ROLE regress_schemauser1; + CREATE SCHEMA testns; +@@ -1715,7 +1715,7 @@ DROP USER regress_priv_user8; -- does not exist + + + -- permissions with LOCK TABLE +-CREATE USER regress_locktable_user; ++CREATE USER regress_locktable_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE TABLE lock_table (a int); + + -- LOCK TABLE and SELECT permission +@@ -1803,7 +1803,7 @@ DROP USER regress_locktable_user; + -- switch to superuser + \c - + +-CREATE ROLE regress_readallstats; ++CREATE ROLE regress_readallstats PASSWORD NEON_PASSWORD_PLACEHOLDER; + + SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no + SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no +@@ -1823,10 +1823,10 @@ RESET ROLE; + DROP ROLE regress_readallstats; + + -- test role grantor machinery +-CREATE ROLE regress_group; +-CREATE ROLE regress_group_direct_manager; +-CREATE ROLE regress_group_indirect_manager; +-CREATE ROLE regress_group_member; ++CREATE ROLE regress_group PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_group_direct_manager PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_group_indirect_manager PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_group_member PASSWORD NEON_PASSWORD_PLACEHOLDER; + + GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE; + GRANT regress_group_direct_manager TO regress_group_indirect_manager; +@@ -1848,9 +1848,9 @@ DROP ROLE regress_group_indirect_manager; + DROP ROLE regress_group_member; + + -- test SET and INHERIT options with object ownership changes +-CREATE ROLE regress_roleoption_protagonist; +-CREATE ROLE regress_roleoption_donor; +-CREATE ROLE regress_roleoption_recipient; ++CREATE ROLE regress_roleoption_protagonist PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_roleoption_donor PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_roleoption_recipient PASSWORD NEON_PASSWORD_PLACEHOLDER; + CREATE SCHEMA regress_roleoption; + GRANT CREATE, USAGE ON SCHEMA regress_roleoption TO PUBLIC; + GRANT regress_roleoption_donor TO regress_roleoption_protagonist WITH INHERIT TRUE, SET FALSE; +diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql +index f3bc6cd07e..f1a2f58069 100644 +--- a/src/test/regress/sql/psql.sql ++++ b/src/test/regress/sql/psql.sql +@@ -496,7 +496,7 @@ select 1 where false; + \pset expanded off + + CREATE SCHEMA tableam_display; +-CREATE ROLE regress_display_role; ++CREATE ROLE regress_display_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER SCHEMA tableam_display OWNER TO regress_display_role; + SET search_path TO tableam_display; + CREATE ACCESS METHOD heap_psql TYPE TABLE HANDLER heap_tableam_handler; +@@ -1174,7 +1174,7 @@ select 1/(15-unique2) from tenk1 order by unique2 limit 19; + \unset FETCH_COUNT + + create schema testpart; +-create role regress_partitioning_role; ++create role regress_partitioning_role PASSWORD NEON_PASSWORD_PLACEHOLDER; + + alter schema testpart owner to regress_partitioning_role; + +@@ -1285,7 +1285,7 @@ reset work_mem; + + -- check \df+ + -- we have to use functions with a predictable owner name, so make a role +-create role regress_psql_user superuser; ++create role regress_psql_user superuser PASSWORD NEON_PASSWORD_PLACEHOLDER; + begin; + set session authorization regress_psql_user; + +@@ -1431,11 +1431,14 @@ CREATE TEMPORARY TABLE reload_output( + ); + + SELECT 1 AS a \g :g_out_file +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT 2 AS b\; SELECT 3 AS c\; SELECT 4 AS d \g :g_out_file +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + COPY (SELECT 'foo') TO STDOUT \; COPY (SELECT 'bar') TO STDOUT \g :g_out_file +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + + SELECT line FROM reload_output ORDER BY lineno; + TRUNCATE TABLE reload_output; +@@ -1452,17 +1455,20 @@ SELECT 1 AS a\; SELECT 2 AS b\; SELECT 3 AS c; + -- COPY TO file + -- The data goes to :g_out_file and the status to :o_out_file + \set QUIET false +-COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO :'g_out_file'; ++\set command '\\COPY (SELECT unique1 FROM onek ORDER BY unique1 LIMIT 10) TO ' :'g_out_file'; ++:command + -- DML command status + UPDATE onek SET unique1 = unique1 WHERE false; + \set QUIET true + \o + + -- Check the contents of the files generated. +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + TRUNCATE TABLE reload_output; +-COPY reload_output(line) FROM :'o_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'o_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + TRUNCATE TABLE reload_output; + +@@ -1475,10 +1481,12 @@ COPY (SELECT 'foo2') TO STDOUT \; COPY (SELECT 'bar2') TO STDOUT \g :g_out_file + \o + + -- Check the contents of the files generated. +-COPY reload_output(line) FROM :'g_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'g_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + TRUNCATE TABLE reload_output; +-COPY reload_output(line) FROM :'o_out_file'; ++\set command '\\COPY reload_output(line) FROM ' :'o_out_file'; ++:command + SELECT line FROM reload_output ORDER BY lineno; + + DROP TABLE reload_output; +@@ -1825,10 +1833,10 @@ DROP FUNCTION psql_error; + \dX "no.such.database"."no.such.schema"."no.such.extended.statistics" + + -- check \drg and \du +-CREATE ROLE regress_du_role0; +-CREATE ROLE regress_du_role1; +-CREATE ROLE regress_du_role2; +-CREATE ROLE regress_du_admin; ++CREATE ROLE regress_du_role0 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_du_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_du_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_du_admin PASSWORD NEON_PASSWORD_PLACEHOLDER; + + GRANT regress_du_role0 TO regress_du_admin WITH ADMIN TRUE; + GRANT regress_du_role1 TO regress_du_admin WITH ADMIN TRUE; +diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql +index d5051a5e74..b32d729271 100644 +--- a/src/test/regress/sql/publication.sql ++++ b/src/test/regress/sql/publication.sql +@@ -1,9 +1,9 @@ + -- + -- PUBLICATION + -- +-CREATE ROLE regress_publication_user LOGIN SUPERUSER; +-CREATE ROLE regress_publication_user2; +-CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER; ++CREATE ROLE regress_publication_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_publication_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_publication_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION 'regress_publication_user'; + + -- suppress warning that depends on wal_level +@@ -801,7 +801,7 @@ DROP PUBLICATION testpub2; + DROP PUBLICATION testpub3; + + SET ROLE regress_publication_user; +-CREATE ROLE regress_publication_user3; ++CREATE ROLE regress_publication_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT regress_publication_user2 TO regress_publication_user3; + SET client_min_messages = 'ERROR'; + CREATE PUBLICATION testpub4 FOR TABLES IN SCHEMA pub_test; +diff --git a/src/test/regress/sql/regproc.sql b/src/test/regress/sql/regproc.sql +index de2aa881a8..41a675fd35 100644 +--- a/src/test/regress/sql/regproc.sql ++++ b/src/test/regress/sql/regproc.sql +@@ -4,7 +4,7 @@ + + /* If objects exist, return oids */ + +-CREATE ROLE regress_regrole_test; ++CREATE ROLE regress_regrole_test PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- without schemaname + +diff --git a/src/test/regress/sql/roleattributes.sql b/src/test/regress/sql/roleattributes.sql +index c961b2d730..0859b89c4f 100644 +--- a/src/test/regress/sql/roleattributes.sql ++++ b/src/test/regress/sql/roleattributes.sql +@@ -1,83 +1,83 @@ + -- default for superuser is false +-CREATE ROLE regress_test_def_superuser; ++CREATE ROLE regress_test_def_superuser PASSWORD NEON_PASSWORD_PLACEHOLDER; + +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser'; +-CREATE ROLE regress_test_superuser WITH SUPERUSER; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_superuser'; ++CREATE ROLE regress_test_superuser WITH SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; + ALTER ROLE regress_test_superuser WITH NOSUPERUSER; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; + ALTER ROLE regress_test_superuser WITH SUPERUSER; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_superuser'; + + -- default for inherit is true +-CREATE ROLE regress_test_def_inherit; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit'; +-CREATE ROLE regress_test_inherit WITH NOINHERIT; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; ++CREATE ROLE regress_test_def_inherit PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_inherit'; ++CREATE ROLE regress_test_inherit WITH NOINHERIT PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; + ALTER ROLE regress_test_inherit WITH INHERIT; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; + ALTER ROLE regress_test_inherit WITH NOINHERIT; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_inherit'; + + -- default for create role is false +-CREATE ROLE regress_test_def_createrole; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole'; +-CREATE ROLE regress_test_createrole WITH CREATEROLE; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; ++CREATE ROLE regress_test_def_createrole PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createrole'; ++CREATE ROLE regress_test_createrole WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; + ALTER ROLE regress_test_createrole WITH NOCREATEROLE; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; + ALTER ROLE regress_test_createrole WITH CREATEROLE; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createrole'; + + -- default for create database is false +-CREATE ROLE regress_test_def_createdb; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb'; +-CREATE ROLE regress_test_createdb WITH CREATEDB; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; ++CREATE ROLE regress_test_def_createdb PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_createdb'; ++CREATE ROLE regress_test_createdb WITH CREATEDB PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; + ALTER ROLE regress_test_createdb WITH NOCREATEDB; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; + ALTER ROLE regress_test_createdb WITH CREATEDB; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_createdb'; + + -- default for can login is false for role +-CREATE ROLE regress_test_def_role_canlogin; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin'; +-CREATE ROLE regress_test_role_canlogin WITH LOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; ++CREATE ROLE regress_test_def_role_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_role_canlogin'; ++CREATE ROLE regress_test_role_canlogin WITH LOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; + ALTER ROLE regress_test_role_canlogin WITH NOLOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; + ALTER ROLE regress_test_role_canlogin WITH LOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_role_canlogin'; + + -- default for can login is true for user +-CREATE USER regress_test_def_user_canlogin; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin'; +-CREATE USER regress_test_user_canlogin WITH NOLOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; ++CREATE USER regress_test_def_user_canlogin PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_user_canlogin'; ++CREATE USER regress_test_user_canlogin WITH NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; + ALTER USER regress_test_user_canlogin WITH LOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; + ALTER USER regress_test_user_canlogin WITH NOLOGIN; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_user_canlogin'; + + -- default for replication is false +-CREATE ROLE regress_test_def_replication; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication'; +-CREATE ROLE regress_test_replication WITH REPLICATION; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; ++CREATE ROLE regress_test_def_replication PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_replication'; ++CREATE ROLE regress_test_replication WITH REPLICATION PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; + ALTER ROLE regress_test_replication WITH NOREPLICATION; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; + ALTER ROLE regress_test_replication WITH REPLICATION; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_replication'; + + -- default for bypassrls is false +-CREATE ROLE regress_test_def_bypassrls; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls'; +-CREATE ROLE regress_test_bypassrls WITH BYPASSRLS; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; ++CREATE ROLE regress_test_def_bypassrls PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_def_bypassrls'; ++CREATE ROLE regress_test_bypassrls WITH BYPASSRLS PASSWORD NEON_PASSWORD_PLACEHOLDER; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; + ALTER ROLE regress_test_bypassrls WITH NOBYPASSRLS; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; + ALTER ROLE regress_test_bypassrls WITH BYPASSRLS; +-SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, rolpassword, rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; ++SELECT rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication, rolbypassrls, rolconnlimit, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+/=]+)\$([a-zA-Z0-9+=/]+):([a-zA-Z0-9+/=]+)', '\1$\2:$:'), rolvaliduntil FROM pg_authid WHERE rolname = 'regress_test_bypassrls'; + + -- clean up roles + DROP ROLE regress_test_def_superuser; +diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql +index dec7340538..cdbc03a5cc 100644 +--- a/src/test/regress/sql/rowsecurity.sql ++++ b/src/test/regress/sql/rowsecurity.sql +@@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE; + RESET client_min_messages; + + -- initial setup +-CREATE USER regress_rls_alice NOLOGIN; +-CREATE USER regress_rls_bob NOLOGIN; +-CREATE USER regress_rls_carol NOLOGIN; +-CREATE USER regress_rls_dave NOLOGIN; +-CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN; +-CREATE ROLE regress_rls_group1 NOLOGIN; +-CREATE ROLE regress_rls_group2 NOLOGIN; ++CREATE USER regress_rls_alice NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_bob NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_carol NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_dave NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_group1 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_group2 NOLOGIN PASSWORD NEON_PASSWORD_PLACEHOLDER; + + GRANT regress_rls_group1 TO regress_rls_bob; + GRANT regress_rls_group2 TO regress_rls_carol; +@@ -2065,8 +2065,8 @@ SELECT count(*) = 0 FROM pg_depend + -- DROP OWNED BY testing + RESET SESSION AUTHORIZATION; + +-CREATE ROLE regress_rls_dob_role1; +-CREATE ROLE regress_rls_dob_role2; ++CREATE ROLE regress_rls_dob_role1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_rls_dob_role2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE TABLE dob_t1 (c1 int); + CREATE TABLE dob_t2 (c1 int) PARTITION BY RANGE (c1); +diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql +index 8b7e255dcd..c58d095c05 100644 +--- a/src/test/regress/sql/rules.sql ++++ b/src/test/regress/sql/rules.sql +@@ -1356,7 +1356,7 @@ DROP TABLE ruletest2; + -- Test non-SELECT rule on security invoker view. + -- Should use view owner's permissions. + -- +-CREATE USER regress_rule_user1; ++CREATE USER regress_rule_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE TABLE ruletest_t1 (x int); + CREATE TABLE ruletest_t2 (x int); +diff --git a/src/test/regress/sql/security_label.sql b/src/test/regress/sql/security_label.sql +index 98e6a5f211..68c868fef2 100644 +--- a/src/test/regress/sql/security_label.sql ++++ b/src/test/regress/sql/security_label.sql +@@ -10,8 +10,8 @@ DROP ROLE IF EXISTS regress_seclabel_user2; + + RESET client_min_messages; + +-CREATE USER regress_seclabel_user1 WITH CREATEROLE; +-CREATE USER regress_seclabel_user2; ++CREATE USER regress_seclabel_user1 WITH CREATEROLE PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_seclabel_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE TABLE seclabel_tbl1 (a int, b text); + CREATE TABLE seclabel_tbl2 (x int, y text); +diff --git a/src/test/regress/sql/select_into.sql b/src/test/regress/sql/select_into.sql +index 689c448cc2..223ceb1d75 100644 +--- a/src/test/regress/sql/select_into.sql ++++ b/src/test/regress/sql/select_into.sql +@@ -20,7 +20,7 @@ DROP TABLE sitmp1; + -- SELECT INTO and INSERT permission, if owner is not allowed to insert. + -- + CREATE SCHEMA selinto_schema; +-CREATE USER regress_selinto_user; ++CREATE USER regress_selinto_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user + REVOKE INSERT ON TABLES FROM regress_selinto_user; + GRANT ALL ON SCHEMA selinto_schema TO public; +diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql +index e742f13699..7bd0255df8 100644 +--- a/src/test/regress/sql/select_views.sql ++++ b/src/test/regress/sql/select_views.sql +@@ -12,7 +12,7 @@ SELECT * FROM toyemp WHERE name = 'sharon'; + -- + -- Test for Leaky view scenario + -- +-CREATE ROLE regress_alice; ++CREATE ROLE regress_alice PASSWORD NEON_PASSWORD_PLACEHOLDER; + + CREATE FUNCTION f_leak (text) + RETURNS bool LANGUAGE 'plpgsql' COST 0.0000001 +diff --git a/src/test/regress/sql/sequence.sql b/src/test/regress/sql/sequence.sql +index 793f1415f6..ec07c1f193 100644 +--- a/src/test/regress/sql/sequence.sql ++++ b/src/test/regress/sql/sequence.sql +@@ -293,7 +293,7 @@ ROLLBACK; + + -- privileges tests + +-CREATE USER regress_seq_user; ++CREATE USER regress_seq_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + + -- nextval + BEGIN; +diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql +index 1e21e55c6d..2251f50c5e 100644 +--- a/src/test/regress/sql/stats.sql ++++ b/src/test/regress/sql/stats.sql +@@ -622,23 +622,6 @@ SELECT :io_sum_shared_after_writes > :io_sum_shared_before_writes; + SELECT current_setting('fsync') = 'off' + OR :io_sum_shared_after_fsyncs > :io_sum_shared_before_fsyncs; + +--- Change the tablespace so that the table is rewritten directly, then SELECT +--- from it to cause it to be read back into shared buffers. +-SELECT sum(reads) AS io_sum_shared_before_reads +- FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \gset +--- Do this in a transaction to prevent spurious failures due to concurrent accesses to our newly +--- rewritten table, e.g. by autovacuum. +-BEGIN; +-ALTER TABLE test_io_shared SET TABLESPACE regress_tblspace; +--- SELECT from the table so that the data is read into shared buffers and +--- context 'normal', object 'relation' reads are counted. +-SELECT COUNT(*) FROM test_io_shared; +-COMMIT; +-SELECT pg_stat_force_next_flush(); +-SELECT sum(reads) AS io_sum_shared_after_reads +- FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \gset +-SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; +- + SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE context = 'normal' AND object = 'relation' \gset + -- Select from the table again to count hits. +diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql +index 1b80d3687b..4d8798b0b1 100644 +--- a/src/test/regress/sql/stats_ext.sql ++++ b/src/test/regress/sql/stats_ext.sql +@@ -50,7 +50,7 @@ DROP TABLE ext_stats_test; + CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); + CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; + COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment'; +-CREATE ROLE regress_stats_ext; ++CREATE ROLE regress_stats_ext PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION regress_stats_ext; + COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment'; + DROP STATISTICS ab1_a_b_stats; +@@ -1607,7 +1607,7 @@ drop statistics stts_t1_expr_expr_stat; + set search_path to public, stts_s1; + \dX + +-create role regress_stats_ext nosuperuser; ++create role regress_stats_ext nosuperuser PASSWORD NEON_PASSWORD_PLACEHOLDER; + set role regress_stats_ext; + \dX + reset role; +@@ -1618,7 +1618,7 @@ drop user regress_stats_ext; + reset search_path; + + -- User with no access +-CREATE USER regress_stats_user1; ++CREATE USER regress_stats_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT USAGE ON SCHEMA tststats TO regress_stats_user1; + SET SESSION AUTHORIZATION regress_stats_user1; + SELECT * FROM tststats.priv_test_tbl; -- Permission denied +diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql +index 444e563ff3..1a538a98a0 100644 +--- a/src/test/regress/sql/subscription.sql ++++ b/src/test/regress/sql/subscription.sql +@@ -2,10 +2,10 @@ + -- SUBSCRIPTION + -- + +-CREATE ROLE regress_subscription_user LOGIN SUPERUSER; +-CREATE ROLE regress_subscription_user2; +-CREATE ROLE regress_subscription_user3 IN ROLE pg_create_subscription; +-CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER; ++CREATE ROLE regress_subscription_user LOGIN SUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_subscription_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE ROLE regress_subscription_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER IN ROLE pg_create_subscription; ++CREATE ROLE regress_subscription_user_dummy LOGIN NOSUPERUSER PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET SESSION AUTHORIZATION 'regress_subscription_user'; + + -- fail - no publications +diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql +index 1b2d434683..b765c748b8 100644 +--- a/src/test/regress/sql/test_setup.sql ++++ b/src/test/regress/sql/test_setup.sql +@@ -135,7 +135,8 @@ CREATE TABLE onek ( + ); + + \set filename :abs_srcdir '/data/onek.data' +-COPY onek FROM :'filename'; ++\set command '\\copy onek FROM ' :'filename'; ++:command + VACUUM ANALYZE onek; + + CREATE TABLE onek2 AS SELECT * FROM onek; +@@ -161,7 +162,8 @@ CREATE TABLE tenk1 ( + ); + + \set filename :abs_srcdir '/data/tenk.data' +-COPY tenk1 FROM :'filename'; ++\set command '\\copy tenk1 FROM ' :'filename'; ++:command + VACUUM ANALYZE tenk1; + + CREATE TABLE tenk2 AS SELECT * FROM tenk1; +@@ -174,7 +176,8 @@ CREATE TABLE person ( + ); + + \set filename :abs_srcdir '/data/person.data' +-COPY person FROM :'filename'; ++\set command '\\copy person FROM ' :'filename'; ++:command + VACUUM ANALYZE person; + + CREATE TABLE emp ( +@@ -183,7 +186,8 @@ CREATE TABLE emp ( + ) INHERITS (person); + + \set filename :abs_srcdir '/data/emp.data' +-COPY emp FROM :'filename'; ++\set command '\\copy emp FROM ' :'filename'; ++:command + VACUUM ANALYZE emp; + + CREATE TABLE student ( +@@ -191,7 +195,8 @@ CREATE TABLE student ( + ) INHERITS (person); + + \set filename :abs_srcdir '/data/student.data' +-COPY student FROM :'filename'; ++\set command '\\copy student FROM ' :'filename'; ++:command + VACUUM ANALYZE student; + + CREATE TABLE stud_emp ( +@@ -199,7 +204,8 @@ CREATE TABLE stud_emp ( + ) INHERITS (emp, student); + + \set filename :abs_srcdir '/data/stud_emp.data' +-COPY stud_emp FROM :'filename'; ++\set command '\\copy stud_emp FROM ' :'filename'; ++:command + VACUUM ANALYZE stud_emp; + + CREATE TABLE road ( +@@ -208,7 +214,8 @@ CREATE TABLE road ( + ); + + \set filename :abs_srcdir '/data/streets.data' +-COPY road FROM :'filename'; ++\set command '\\copy road FROM ' :'filename'; ++:command + VACUUM ANALYZE road; + + CREATE TABLE ihighway () INHERITS (road); +diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql +index fbd26cdba4..7ec2d78eee 100644 +--- a/src/test/regress/sql/tsearch.sql ++++ b/src/test/regress/sql/tsearch.sql +@@ -49,7 +49,8 @@ CREATE TABLE test_tsvector( + ); + + \set filename :abs_srcdir '/data/tsearch.data' +-COPY test_tsvector FROM :'filename'; ++\set command '\\copy test_tsvector FROM ' :'filename'; ++:command + + ANALYZE test_tsvector; + +diff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql +index 0a3176e25d..7744ef68f5 100644 +--- a/src/test/regress/sql/updatable_views.sql ++++ b/src/test/regress/sql/updatable_views.sql +@@ -425,9 +425,9 @@ DROP TABLE base_tbl CASCADE; + + -- permissions checks + +-CREATE USER regress_view_user1; +-CREATE USER regress_view_user2; +-CREATE USER regress_view_user3; ++CREATE USER regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++CREATE USER regress_view_user3 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + SET SESSION AUTHORIZATION regress_view_user1; + CREATE TABLE base_tbl(a int, b text, c float); +@@ -1586,8 +1586,8 @@ drop view uv_iocu_view; + drop table uv_iocu_tab; + + -- ON CONFLICT DO UPDATE permissions checks +-create user regress_view_user1; +-create user regress_view_user2; ++create user regress_view_user1 PASSWORD NEON_PASSWORD_PLACEHOLDER; ++create user regress_view_user2 PASSWORD NEON_PASSWORD_PLACEHOLDER; + + set session authorization regress_view_user1; + create table base_tbl(a int unique, b text, c float); +diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql +index 7a7bee77b9..07b480cd59 100644 +--- a/src/test/regress/sql/update.sql ++++ b/src/test/regress/sql/update.sql +@@ -339,7 +339,7 @@ DROP FUNCTION func_parted_mod_b(); + ----------------------------------------- + + ALTER TABLE range_parted ENABLE ROW LEVEL SECURITY; +-CREATE USER regress_range_parted_user; ++CREATE USER regress_range_parted_user PASSWORD NEON_PASSWORD_PLACEHOLDER; + GRANT ALL ON range_parted, mintab TO regress_range_parted_user; + CREATE POLICY seeall ON range_parted AS PERMISSIVE FOR SELECT USING (true); + CREATE POLICY policy_range_parted ON range_parted for UPDATE USING (true) WITH CHECK (c % 2 = 0); +diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql +index ae36b54641..5612b8e162 100644 +--- a/src/test/regress/sql/vacuum.sql ++++ b/src/test/regress/sql/vacuum.sql +@@ -335,7 +335,7 @@ CREATE TABLE vacowned (a int); + CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a); + CREATE TABLE vacowned_part1 PARTITION OF vacowned_parted FOR VALUES IN (1); + CREATE TABLE vacowned_part2 PARTITION OF vacowned_parted FOR VALUES IN (2); +-CREATE ROLE regress_vacuum; ++CREATE ROLE regress_vacuum PASSWORD NEON_PASSWORD_PLACEHOLDER; + SET ROLE regress_vacuum; + -- Simple table + VACUUM vacowned; diff --git a/patches/pg_anon.patch b/compute/patches/pg_anon.patch similarity index 100% rename from patches/pg_anon.patch rename to compute/patches/pg_anon.patch diff --git a/patches/pg_cron.patch b/compute/patches/pg_cron.patch similarity index 100% rename from patches/pg_cron.patch rename to compute/patches/pg_cron.patch diff --git a/patches/pg_hint_plan.patch b/compute/patches/pg_hint_plan.patch similarity index 100% rename from patches/pg_hint_plan.patch rename to compute/patches/pg_hint_plan.patch diff --git a/patches/pgvector.patch b/compute/patches/pgvector.patch similarity index 100% rename from patches/pgvector.patch rename to compute/patches/pgvector.patch diff --git a/patches/rum.patch b/compute/patches/rum.patch similarity index 100% rename from patches/rum.patch rename to compute/patches/rum.patch diff --git a/compute/vm-image-spec.yaml b/compute/vm-image-spec.yaml new file mode 100644 index 0000000000..0af44745e5 --- /dev/null +++ b/compute/vm-image-spec.yaml @@ -0,0 +1,112 @@ +# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image. +--- +commands: + - name: cgconfigparser + user: root + sysvInitAction: sysinit + shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664' + # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for + # running it as root. + - name: chmod-resize-swap + user: root + sysvInitAction: sysinit + shell: 'chmod 711 /neonvm/bin/resize-swap' + - name: pgbouncer + user: postgres + sysvInitAction: respawn + shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini' + - name: postgres-exporter + user: nobody + sysvInitAction: respawn + shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter' + - name: sql-exporter + user: nobody + sysvInitAction: respawn + shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399' + - name: sql-exporter-autoscaling + user: nobody + sysvInitAction: respawn + shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499' +shutdownHook: | + su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10' +files: + - filename: compute_ctl-resize-swap + content: | + # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap + # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL) + postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap + - filename: cgconfig.conf + content: | + # Configuration for cgroups in VM compute nodes + group neon-postgres { + perm { + admin { + uid = postgres; + } + task { + gid = users; + } + } + memory {} + } +build: | + # Build cgroup-tools + # + # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically + # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor + # requires cgroup v2, so we'll build cgroup-tools ourselves. + FROM debian:bullseye-slim as libcgroup-builder + ENV LIBCGROUP_VERSION=v2.0.3 + + RUN set -exu \ + && apt update \ + && apt install --no-install-recommends -y \ + git \ + ca-certificates \ + automake \ + cmake \ + make \ + gcc \ + byacc \ + flex \ + libtool \ + libpam0g-dev \ + && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \ + && INSTALL_DIR="/libcgroup-install" \ + && mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \ + && cd libcgroup \ + # extracted from bootstrap.sh, with modified flags: + && (test -d m4 || mkdir m4) \ + && autoreconf -fi \ + && rm -rf autom4te.cache \ + && CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \ + # actually build the thing... + && make install +merge: | + # tweak nofile limits + RUN set -e \ + && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \ + && test ! -e /etc/security || ( \ + echo '* - nofile 1048576' >>/etc/security/limits.conf \ + && echo 'root - nofile 1048576' >>/etc/security/limits.conf \ + ) + + # Allow postgres user (compute_ctl) to run swap resizer. + # Need to install sudo in order to allow this. + # + # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe. + RUN set -e \ + && apt update \ + && apt install --no-install-recommends -y \ + sudo \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap + + COPY cgconfig.conf /etc/cgconfig.conf + + RUN set -e \ + && chmod 0644 /etc/cgconfig.conf + + COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ + COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/ + COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/ diff --git a/control_plane/Cargo.toml b/control_plane/Cargo.toml index c185d20484..df87c181bf 100644 --- a/control_plane/Cargo.toml +++ b/control_plane/Cargo.toml @@ -9,7 +9,6 @@ anyhow.workspace = true camino.workspace = true clap.workspace = true comfy-table.workspace = true -git-version.workspace = true humantime.workspace = true nix.workspace = true once_cell.workspace = true diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 33ca70af96..cae9416af6 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -17,9 +17,7 @@ use std::time::Duration; use anyhow::{bail, Context}; use camino::Utf8PathBuf; -use pageserver_api::models::{ - self, AuxFilePolicy, LocationConfig, TenantHistorySize, TenantInfo, TimelineInfo, -}; +use pageserver_api::models::{self, AuxFilePolicy, TenantInfo, TimelineInfo}; use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api; use postgres_backend::AuthType; @@ -324,22 +322,6 @@ impl PageServerNode { background_process::stop_process(immediate, "pageserver", &self.pid_file()) } - pub async fn page_server_psql_client( - &self, - ) -> anyhow::Result<( - tokio_postgres::Client, - tokio_postgres::Connection, - )> { - let mut config = self.pg_connection_config.clone(); - if self.conf.pg_auth_type == AuthType::NeonJWT { - let token = self - .env - .generate_auth_token(&Claims::new(None, Scope::PageServerApi))?; - config = config.set_password(Some(token)); - } - Ok(config.connect_no_tls().await?) - } - pub async fn check_status(&self) -> mgmt_api::Result<()> { self.http_client.status().await } @@ -540,19 +522,6 @@ impl PageServerNode { Ok(()) } - pub async fn location_config( - &self, - tenant_shard_id: TenantShardId, - config: LocationConfig, - flush_ms: Option, - lazy: bool, - ) -> anyhow::Result<()> { - Ok(self - .http_client - .location_config(tenant_shard_id, config, flush_ms, lazy) - .await?) - } - pub async fn timeline_list( &self, tenant_shard_id: &TenantShardId, @@ -636,14 +605,4 @@ impl PageServerNode { Ok(()) } - - pub async fn tenant_synthetic_size( - &self, - tenant_shard_id: TenantShardId, - ) -> anyhow::Result { - Ok(self - .http_client - .tenant_synthetic_size(tenant_shard_id) - .await?) - } } diff --git a/control_plane/src/postgresql_conf.rs b/control_plane/src/postgresql_conf.rs index 638575eb82..5aee12dc97 100644 --- a/control_plane/src/postgresql_conf.rs +++ b/control_plane/src/postgresql_conf.rs @@ -4,13 +4,10 @@ /// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just /// enough to extract a few settings we need in Neon, assuming you don't do /// funny stuff like include-directives or funny escaping. -use anyhow::{bail, Context, Result}; use once_cell::sync::Lazy; use regex::Regex; use std::collections::HashMap; use std::fmt; -use std::io::BufRead; -use std::str::FromStr; /// In-memory representation of a postgresql.conf file #[derive(Default, Debug)] @@ -19,84 +16,16 @@ pub struct PostgresConf { hash: HashMap, } -static CONF_LINE_RE: Lazy = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap()); - impl PostgresConf { pub fn new() -> PostgresConf { PostgresConf::default() } - /// Read file into memory - pub fn read(read: impl std::io::Read) -> Result { - let mut result = Self::new(); - - for line in std::io::BufReader::new(read).lines() { - let line = line?; - - // Store each line in a vector, in original format - result.lines.push(line.clone()); - - // Also parse each line and insert key=value lines into a hash map. - // - // FIXME: This doesn't match exactly the flex/bison grammar in PostgreSQL. - // But it's close enough for our usage. - let line = line.trim(); - if line.starts_with('#') { - // comment, ignore - continue; - } else if let Some(caps) = CONF_LINE_RE.captures(line) { - let name = caps.get(1).unwrap().as_str(); - let raw_val = caps.get(2).unwrap().as_str(); - - if let Ok(val) = deescape_str(raw_val) { - // Note: if there's already an entry in the hash map for - // this key, this will replace it. That's the behavior what - // we want; when PostgreSQL reads the file, each line - // overrides any previous value for the same setting. - result.hash.insert(name.to_string(), val.to_string()); - } - } - } - Ok(result) - } - /// Return the current value of 'option' pub fn get(&self, option: &str) -> Option<&str> { self.hash.get(option).map(|x| x.as_ref()) } - /// Return the current value of a field, parsed to the right datatype. - /// - /// This calls the FromStr::parse() function on the value of the field. If - /// the field does not exist, or parsing fails, returns an error. - /// - pub fn parse_field(&self, field_name: &str, context: &str) -> Result - where - T: FromStr, - ::Err: std::error::Error + Send + Sync + 'static, - { - self.get(field_name) - .with_context(|| format!("could not find '{}' option {}", field_name, context))? - .parse::() - .with_context(|| format!("could not parse '{}' option {}", field_name, context)) - } - - pub fn parse_field_optional(&self, field_name: &str, context: &str) -> Result> - where - T: FromStr, - ::Err: std::error::Error + Send + Sync + 'static, - { - if let Some(val) = self.get(field_name) { - let result = val - .parse::() - .with_context(|| format!("could not parse '{}' option {}", field_name, context))?; - - Ok(Some(result)) - } else { - Ok(None) - } - } - /// /// Note: if you call this multiple times for the same option, the config /// file will a line for each call. It would be nice to have a function @@ -154,48 +83,8 @@ fn escape_str(s: &str) -> String { } } -/// De-escape a possibly-quoted value. -/// -/// See `DeescapeQuotedString` function in PostgreSQL sources for how PostgreSQL -/// does this. -fn deescape_str(s: &str) -> Result { - // If the string has a quote at the beginning and end, strip them out. - if s.len() >= 2 && s.starts_with('\'') && s.ends_with('\'') { - let mut result = String::new(); - - let mut iter = s[1..(s.len() - 1)].chars().peekable(); - while let Some(c) = iter.next() { - let newc = if c == '\\' { - match iter.next() { - Some('b') => '\x08', - Some('f') => '\x0c', - Some('n') => '\n', - Some('r') => '\r', - Some('t') => '\t', - Some('0'..='7') => { - // TODO - bail!("octal escapes not supported"); - } - Some(n) => n, - None => break, - } - } else if c == '\'' && iter.peek() == Some(&'\'') { - // doubled quote becomes just one quote - iter.next().unwrap() - } else { - c - }; - - result.push(newc); - } - Ok(result) - } else { - Ok(s.to_string()) - } -} - #[test] -fn test_postgresql_conf_escapes() -> Result<()> { +fn test_postgresql_conf_escapes() -> anyhow::Result<()> { assert_eq!(escape_str("foo bar"), "'foo bar'"); // these don't need to be quoted assert_eq!(escape_str("foo"), "foo"); @@ -214,13 +103,5 @@ fn test_postgresql_conf_escapes() -> Result<()> { assert_eq!(escape_str("fo\\o"), "'fo\\\\o'"); assert_eq!(escape_str("10 cats"), "'10 cats'"); - // Test de-escaping - assert_eq!(deescape_str(&escape_str("foo"))?, "foo"); - assert_eq!(deescape_str(&escape_str("fo'o\nba\\r"))?, "fo'o\nba\\r"); - assert_eq!(deescape_str("'\\b\\f\\n\\r\\t'")?, "\x08\x0c\n\r\t"); - - // octal-escapes are currently not supported - assert!(deescape_str("'foo\\7\\07\\007'").is_err()); - Ok(()) } diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 2b714fbfbf..0c0e67dff0 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -346,7 +346,14 @@ impl StorageController { let pg_log_path = pg_data_path.join("postgres.log"); if !tokio::fs::try_exists(&pg_data_path).await? { - let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()]; + let initdb_args = [ + "-D", + pg_data_path.as_ref(), + "--username", + &username(), + "--no-sync", + "--no-instructions", + ]; tracing::info!( "Initializing storage controller database with args: {:?}", initdb_args diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index 651fcda8db..73d89699ed 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration}; use clap::{Parser, Subcommand}; use pageserver_api::{ controller_api::{ - NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy, - TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest, + AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, + ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest, }, models::{ EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, @@ -339,7 +339,7 @@ async fn main() -> anyhow::Result<()> { listen_pg_port, listen_http_addr, listen_http_port, - availability_zone_id, + availability_zone_id: AvailabilityZone(availability_zone_id), }), ) .await?; diff --git a/docker-compose/README.md b/docker-compose/README.md index bd47805a67..648e4ca030 100644 --- a/docker-compose/README.md +++ b/docker-compose/README.md @@ -2,8 +2,8 @@ # Example docker compose configuration The configuration in this directory is used for testing Neon docker images: it is -not intended for deploying a usable system. To run a development environment where -you can experiment with a minature Neon system, use `cargo neon` rather than container images. +not intended for deploying a usable system. To run a development environment where +you can experiment with a miniature Neon system, use `cargo neon` rather than container images. This configuration does not start the storage controller, because the controller needs a way to reconfigure running computes, and no such thing exists in this setup. diff --git a/docs/rfcs/038-independent-compute-release.md b/docs/rfcs/038-independent-compute-release.md new file mode 100644 index 0000000000..3deaf1e6fd --- /dev/null +++ b/docs/rfcs/038-independent-compute-release.md @@ -0,0 +1,343 @@ +# Independent compute release + +Created at: 2024-08-30. Author: Alexey Kondratov (@ololobus) + +## Summary + +This document proposes an approach to fully independent compute release flow. It attempts to +cover the following features: + +- Process is automated as much as possible to minimize human errors. +- Compute<->storage protocol compatibility is ensured. +- A transparent release history is available with an easy rollback strategy. +- Although not in the scope of this document, there is a viable way to extend the proposed release + flow to achieve the canary and/or blue-green deployment strategies. + +## Motivation + +Previously, the compute release was tightly coupled to the storage release. This meant that once +some storage nodes got restarted with a newer version, all new compute starts using these nodes +automatically got a new version. Thus, two releases happen in parallel, which increases the blast +radius and makes ownership fuzzy. + +Now, we practice a manual v0 independent compute release flow -- after getting a new compute release +image and tag, we pin it region by region using Admin UI. It's better, but it still has its own flaws: + +1. It's a simple but fairly manual process, as you need to click through a few pages. +2. It's prone to human errors, e.g., you could mistype or copy the wrong compute tag. +3. We now require an additional approval in the Admin UI, which partially solves the 2., + but also makes the whole process pretty annoying, as you constantly need to go back + and forth between two people. + +## Non-goals + +It's not the goal of this document to propose a design for some general-purpose release tool like Helm. +The document considers how the current compute fleet is orchestrated at Neon. Even if we later +decide to split the control plane further (e.g., introduce a separate compute controller), the proposed +release process shouldn't change much, i.e., the releases table and API will reside in +one of the parts. + +Achieving the canary and/or blue-green deploy strategies is out of the scope of this document. They +were kept in mind, though, so it's expected that the proposed approach will lay down the foundation +for implementing them in future iterations. + +## Impacted components + +Compute, control plane, CI, observability (some Grafana dashboards may require changes). + +## Prior art + +One of the very close examples is how Helm tracks [releases history](https://helm.sh/docs/helm/helm_history/). + +In the code: + +- [Release](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/release.go#L20-L43) +- [Release info](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/info.go#L24-L40) +- [Release status](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/status.go#L18-L42) + +TL;DR it has several important attributes: + +- Revision -- unique release ID/primary key. It is not the same as the application version, + because the same version can be deployed several times, e.g., after a newer version rollback. +- App version -- version of the application chart/code. +- Config -- set of overrides to the default config of the application. +- Status -- current status of the release in the history. +- Timestamps -- tracks when a release was created and deployed. + +## Proposed implementation + +### Separate release branch + +We will use a separate release branch, `release-compute`, to have a clean history for releases and commits. +In order to avoid confusion with storage releases, we will use a different prefix for compute [git release +tags](https://github.com/neondatabase/neon/releases) -- `release-compute-XXXX`. We will use the same tag for +Docker images as well. The `neondatabase/compute-node-v16:release-compute-XXXX` looks longer and a bit redundant, +but it's better to have image and git tags in sync. + +Currently, control plane relies on the numeric compute and storage release versions to decide on compute->storage +compatibility. Once we implement this proposal, we should drop this code as release numbers will be completely +independent. The only constraint we want is that it must monotonically increase within the same release branch. + +### Compute config/settings manifest + +We will create a new sub-directory `compute` and file `compute/manifest.yaml` with a structure: + +```yaml +pg_settings: + # Common settings for primaries and secondaries of all versions. + common: + wal_log_hints: "off" + max_wal_size: "1024" + + per_version: + 14: + # Common settings for both replica and primary of version PG 14 + common: + shared_preload_libraries: "neon,pg_stat_statements,extension_x" + 15: + common: + shared_preload_libraries: "neon,pg_stat_statements,extension_x" + # Settings that should be applied only to + replica: + # Available only starting Postgres 15th + recovery_prefetch: "off" + # ... + 17: + common: + # For example, if third-party `extension_x` is not yet available for PG 17 + shared_preload_libraries: "neon,pg_stat_statements" + replica: + recovery_prefetch: "off" +``` + +**N.B.** Setting value should be a string with `on|off` for booleans and a number (as a string) +without units for all numeric settings. That's how the control plane currently operates. + +The priority of settings will be (a higher number is a higher priority): + +1. Any static and hard-coded settings in the control plane +2. `pg_settings->common` +3. Per-version `common` +4. Per-version `replica` +5. Any per-user/project/endpoint overrides in the control plane +6. Any dynamic setting calculated based on the compute size + +**N.B.** For simplicity, we do not do any custom logic for `shared_preload_libraries`, so it's completely +overridden if specified on some level. Make sure that you include all necessary extensions in it when you +do any overrides. + +**N.B.** There is a tricky question about what to do with custom compute image pinning we sometimes +do for particular projects and customers. That's usually some ad-hoc work and images are based on +the latest compute image, so it's relatively safe to assume that we could use settings from the latest compute +release. If for some reason that's not true, and further overrides are needed, it's also possible to do +on the project level together with pinning the image, so it's on-call/engineer/support responsibility to +ensure that compute starts with the specified custom image. The only real risk is that compute image will get +stale and settings from new releases will drift away, so eventually it will get something incompatible, +but i) this is some operational issue, as we do not want stale images anyway, and ii) base settings +receive something really new so rarely that the chance of this happening is very low. If we want to solve it completely, +then together with pinning the image we could also pin the matching release revision in the control plane. + +The compute team will own the content of `compute/manifest.yaml`. + +### Control plane: releases table + +In order to store information about releases, the control plane will use a table `compute_releases` with the following +schema: + +```sql +CREATE TABLE compute_releases ( + -- Unique release ID + -- N.B. Revision won't by synchronized across all regions, because all control planes are technically independent + -- services. We have the same situation with Helm releases as well because they could be deployed and rolled back + -- independently in different clusters. + revision BIGSERIAL PRIMARY KEY, + -- Numeric version of the compute image, e.g. 9057 + version BIGINT NOT NULL, + -- Compute image tag, e.g. `release-9057` + tag TEXT NOT NULL, + -- Current release status. Currently, it will be a simple enum + -- * `deployed` -- release is deployed and used for new compute starts. + -- Exactly one release can have this status at a time. + -- * `superseded` -- release has been replaced by a newer one. + -- But we can always extend it in the future when we need more statuses + -- for more complex deployment strategies. + status TEXT NOT NULL, + -- Any additional metadata for compute in the corresponding release + manifest JSONB NOT NULL, + -- Timestamp when release record was created in the control plane database + created_at TIMESTAMP NOT NULL DEFAULT now(), + -- Timestamp when release deployment was finished + deployed_at TIMESTAMP +); +``` + +We keep track of the old releases not only for the sake of audit, but also because we usually have ~30% of +old computes started using the image from one of the previous releases. Yet, when users want to reconfigure +them without restarting, the control plane needs to know what settings are applicable to them, so we also need +information about the previous releases that are readily available. There could be some other auxiliary info +needed as well: supported extensions, compute flags, etc. + +**N.B.** Here, we can end up in an ambiguous situation when the same compute image is deployed twice, e.g., +it was deployed once, then rolled back, and then deployed again, potentially with a different manifest. Yet, +we could've started some computes with the first deployment and some with the second. Thus, when we need to +look up the manifest for the compute by its image tag, we will see two records in the table with the same tag, +but different revision numbers. We can assume that this could happen only in case of rollbacks, so we +can just take the latest revision for the given tag. + +### Control plane: management API + +The control plane will implement new API methods to manage releases: + +1. `POST /management/api/v2/compute_releases` to create a new release. With payload + + ```json + { + "version": 9057, + "tag": "release-9057", + "manifest": {} + } + ``` + + and response + + ```json + { + "revision": 53, + "version": 9057, + "tag": "release-9057", + "status": "deployed", + "manifest": {}, + "created_at": "2024-08-15T15:52:01.0000Z", + "deployed_at": "2024-08-15T15:52:01.0000Z", + } + ``` + + Here, we can actually mix-in custom (remote) extensions metadata into the `manifest`, so that the control plane + will get information about all available extensions not bundled into compute image. The corresponding + workflow in `neondatabase/build-custom-extensions` should produce it as an artifact and make + it accessible to the workflow in the `neondatabase/infra`. See the complete release flow below. Doing that, + we put a constraint that new custom extension requires new compute release, which is good for the safety, + but is not exactly what we want operational-wise (we want to be able to deploy new extensions without new + images). Yet, it can be solved incrementally: v0 -- do not do anything with extensions at all; + v1 -- put them into the same manifest; v2 -- make them separate entities with their own lifecycle. + + **N.B.** This method is intended to be used in CI workflows, and CI/network can be flaky. It's reasonable + to assume that we could retry the request several times, even though it's already succeeded. Although it's + not a big deal to create several identical releases one-by-one, it's better to avoid it, so the control plane + should check if the latest release is identical and just return `304 Not Modified` in this case. + +2. `POST /management/api/v2/compute_releases/rollback` to rollback to any previously deployed release. With payload + including the revision of the release to rollback to: + + ```json + { + "revision": 52 + } + ``` + + Rollback marks the current release as `superseded` and creates a new release with all the same data as the + requested revision, but with a new revision number. + + This rollback API is not strictly needed, as we can just use `infra` repo workflow to deploy any + available tag. It's still nice to have for on-call and any urgent matters, for example, if we need + to rollback and GitHub is down. It's much easier to specify only the revision number vs. crafting + all the necessary data for the new release payload. + +### Compute->storage compatibility tests + +In order to safely release new compute versions independently from storage, we need to ensure that the currently +deployed storage is compatible with the new compute version. Currently, we maintain backward compatibility +in storage, but newer computes may require a newer storage version. + +Remote end-to-end (e2e) tests [already accept](https://github.com/neondatabase/cloud/blob/e3468d433e0d73d02b7d7e738d027f509b522408/.github/workflows/testing.yml#L43-L48) +`storage_image_tag` and `compute_image_tag` as separate inputs. That means that we could reuse e2e tests to ensure +compatibility between storage and compute: + +1. Pick the latest storage release tag and use it as `storage_image_tag`. +2. Pick a new compute tag built in the current compute release PR and use it as `compute_image_tag`. + Here, we should use a temporary ECR image tag, because the final tag will be known only after the release PR is merged. +3. Trigger e2e tests as usual. + +### Release flow + +```mermaid + sequenceDiagram + + actor oncall as Compute on-call person + participant neon as neondatabase/neon + + box private + participant cloud as neondatabase/cloud + participant exts as neondatabase/build-custom-extensions + participant infra as neondatabase/infra + end + + box cloud + participant preprod as Pre-prod control plane + participant prod as Production control plane + participant k8s as Compute k8s + end + + oncall ->> neon: Open release PR into release-compute + + activate neon + neon ->> cloud: CI: trigger e2e compatibility tests + activate cloud + cloud -->> neon: CI: e2e tests pass + deactivate cloud + neon ->> neon: CI: pass PR checks, get approvals + deactivate neon + + oncall ->> neon: Merge release PR into release-compute + + activate neon + neon ->> neon: CI: pass checks, build and push images + neon ->> exts: CI: trigger extensions build + activate exts + exts -->> neon: CI: extensions are ready + deactivate exts + neon ->> neon: CI: create release tag + neon ->> infra: Trigger release workflow using the produced tag + deactivate neon + + activate infra + infra ->> infra: CI: pass checks + infra ->> preprod: Release new compute image to pre-prod automatically
POST /management/api/v2/compute_releases + activate preprod + preprod -->> infra: 200 OK + deactivate preprod + + infra ->> infra: CI: wait for per-region production deploy approvals + oncall ->> infra: CI: approve deploys region by region + infra ->> k8s: Prewarm new compute image + infra ->> prod: POST /management/api/v2/compute_releases + activate prod + prod -->> infra: 200 OK + deactivate prod + deactivate infra +``` + +## Further work + +As briefly mentioned in other sections, eventually, we would like to use more complex deployment strategies. +For example, we can pass a fraction of the total compute starts that should use the new release. Then we can +mark the release as `partial` or `canary` and monitor its performance. If everything is fine, we can promote it +to `deployed` status. If not, we can roll back to the previous one. + +## Alternatives + +In theory, we can try using Helm as-is: + +1. Write a compute Helm chart. That will actually have only some config map, which the control plane can access and read. + N.B. We could reuse the control plane chart as well, but then it's not a fully independent release again and even more fuzzy. +2. The control plane will read it and start using the new compute version for new starts. + +Drawbacks: + +1. Helm releases work best if the workload is controlled by the Helm chart itself. Then you can have different + deployment strategies like rolling update or canary or blue/green deployments. At Neon, the compute starts are controlled + by control plane, so it makes it much more tricky. +2. Releases visibility will suffer, i.e. instead of a nice table in the control plane and Admin UI, we would need to use + `helm` cli and/or K8s UIs like K8sLens. +3. We do not restart all computes shortly after the new version release. This means that for some features and compatibility + purpose (see above) control plane may need some auxiliary info from the previous releases. diff --git a/libs/consumption_metrics/src/lib.rs b/libs/consumption_metrics/src/lib.rs index 810196aff6..fbe2e6830f 100644 --- a/libs/consumption_metrics/src/lib.rs +++ b/libs/consumption_metrics/src/lib.rs @@ -5,7 +5,7 @@ use chrono::{DateTime, Utc}; use rand::Rng; use serde::{Deserialize, Serialize}; -#[derive(Serialize, serde::Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] +#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] #[serde(tag = "type")] pub enum EventType { #[serde(rename = "absolute")] @@ -107,7 +107,7 @@ pub const CHUNK_SIZE: usize = 1000; // Just a wrapper around a slice of events // to serialize it as `{"events" : [ ] } -#[derive(serde::Serialize, serde::Deserialize)] +#[derive(serde::Serialize, Deserialize)] pub struct EventChunk<'a, T: Clone> { pub events: std::borrow::Cow<'a, [T]>, } diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 1194ee93ef..95310fdbac 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -104,9 +104,6 @@ pub struct ConfigToml { pub image_compression: ImageCompressionAlgorithm, pub ephemeral_bytes_per_memory_kb: usize, pub l0_flush: Option, - #[serde(skip_serializing)] - // TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's - pub compact_level0_phase1_value_access: serde::de::IgnoredAny, pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode, pub io_buffer_alignment: usize, } @@ -173,40 +170,6 @@ impl Default for EvictionOrder { } } -#[derive( - Eq, - PartialEq, - Debug, - Copy, - Clone, - strum_macros::EnumString, - strum_macros::Display, - serde_with::DeserializeFromStr, - serde_with::SerializeDisplay, -)] -#[strum(serialize_all = "kebab-case")] -pub enum GetVectoredImpl { - Sequential, - Vectored, -} - -#[derive( - Eq, - PartialEq, - Debug, - Copy, - Clone, - strum_macros::EnumString, - strum_macros::Display, - serde_with::DeserializeFromStr, - serde_with::SerializeDisplay, -)] -#[strum(serialize_all = "kebab-case")] -pub enum GetImpl { - Legacy, - Vectored, -} - #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(transparent)] pub struct MaxVectoredReadBytes(pub NonZeroUsize); @@ -338,8 +301,6 @@ pub mod defaults { pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm = ImageCompressionAlgorithm::Zstd { level: Some(1) }; - pub const DEFAULT_VALIDATE_VECTORED_GET: bool = false; - pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0; pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512; @@ -376,7 +337,10 @@ impl Default for ConfigToml { concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP) .expect("Invalid default constant")), - concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(1).unwrap(), + concurrent_tenant_size_logical_size_queries: NonZeroUsize::new( + DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES, + ) + .unwrap(), metric_collection_interval: (humantime::parse_duration( DEFAULT_METRIC_COLLECTION_INTERVAL, ) @@ -417,7 +381,6 @@ impl Default for ConfigToml { image_compression: (DEFAULT_IMAGE_COMPRESSION), ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB), l0_flush: None, - compact_level0_phase1_value_access: Default::default(), virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(), io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT, @@ -467,8 +430,6 @@ pub mod tenant_conf_defaults { // By default ingest enough WAL for two new L0 layers before checking if new image // image layers should be created. pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2; - - pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100; } impl Default for TenantConfigToml { diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 40b7dbbbc2..0ea30ce54f 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -1,4 +1,5 @@ use std::collections::{HashMap, HashSet}; +use std::fmt::Display; use std::str::FromStr; use std::time::{Duration, Instant}; @@ -57,7 +58,7 @@ pub struct NodeRegisterRequest { pub listen_http_addr: String, pub listen_http_port: u16, - pub availability_zone_id: String, + pub availability_zone_id: AvailabilityZone, } #[derive(Serialize, Deserialize)] @@ -74,10 +75,19 @@ pub struct TenantPolicyRequest { pub scheduling: Option, } +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct AvailabilityZone(pub String); + +impl Display for AvailabilityZone { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + #[derive(Serialize, Deserialize)] pub struct ShardsPreferredAzsRequest { #[serde(flatten)] - pub preferred_az_ids: HashMap, + pub preferred_az_ids: HashMap, } #[derive(Serialize, Deserialize)] diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 45e84baa1f..45abda0ad8 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -37,14 +37,11 @@ use bytes::{Buf, BufMut, Bytes, BytesMut}; /// ```mermaid /// stateDiagram-v2 /// -/// [*] --> Loading: spawn_load() /// [*] --> Attaching: spawn_attach() /// -/// Loading --> Activating: activate() /// Attaching --> Activating: activate() /// Activating --> Active: infallible /// -/// Loading --> Broken: load() failure /// Attaching --> Broken: attach() failure /// /// Active --> Stopping: set_stopping(), part of shutdown & detach @@ -68,10 +65,6 @@ use bytes::{Buf, BufMut, Bytes, BytesMut}; )] #[serde(tag = "slug", content = "data")] pub enum TenantState { - /// This tenant is being loaded from local disk. - /// - /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass. - Loading, /// This tenant is being attached to the pageserver. /// /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass. @@ -121,8 +114,6 @@ impl TenantState { // But, our attach task might still be fetching the remote timelines, etc. // So, return `Maybe` while Attaching, making Console wait for the attach task to finish. Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe, - // tenant mgr startup distinguishes attaching from loading via marker file. - Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached, // We only reach Active after successful load / attach. // So, call atttachment status Attached. Self::Active => Attached, @@ -191,10 +182,11 @@ impl LsnLease { } /// The only [`TenantState`] variants we could be `TenantState::Activating` from. +/// +/// XXX: We used to have more variants here, but now it's just one, which makes this rather +/// useless. Remove, once we've checked that there's no client code left that looks at this. #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum ActivatingFrom { - /// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`] - Loading, /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`] Attaching, } @@ -495,7 +487,7 @@ pub struct CompactionAlgorithmSettings { pub kind: CompactionAlgorithm, } -#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] +#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)] pub enum L0FlushConfig { #[serde(rename_all = "snake_case")] @@ -1562,11 +1554,8 @@ mod tests { #[test] fn tenantstatus_activating_serde() { - let states = [ - TenantState::Activating(ActivatingFrom::Loading), - TenantState::Activating(ActivatingFrom::Attaching), - ]; - let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]"; + let states = [TenantState::Activating(ActivatingFrom::Attaching)]; + let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]"; let actual = serde_json::to_string(&states).unwrap(); @@ -1581,13 +1570,7 @@ mod tests { fn tenantstatus_activating_strum() { // tests added, because we use these for metrics let examples = [ - (line!(), TenantState::Loading, "Loading"), (line!(), TenantState::Attaching, "Attaching"), - ( - line!(), - TenantState::Activating(ActivatingFrom::Loading), - "Activating", - ), ( line!(), TenantState::Activating(ActivatingFrom::Attaching), diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index 8ea4b93fb1..e274d24585 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -280,16 +280,6 @@ pub struct PostgresBackend { pub type PostgresBackendTCP = PostgresBackend; -pub fn query_from_cstring(query_string: Bytes) -> Vec { - let mut query_string = query_string.to_vec(); - if let Some(ch) = query_string.last() { - if *ch == 0 { - query_string.pop(); - } - } - query_string -} - /// Cast a byte slice to a string slice, dropping null terminator if there's one. fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> { let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes); diff --git a/libs/postgres_ffi/src/pg_constants.rs b/libs/postgres_ffi/src/pg_constants.rs index 61b49a634d..497d011d7a 100644 --- a/libs/postgres_ffi/src/pg_constants.rs +++ b/libs/postgres_ffi/src/pg_constants.rs @@ -9,8 +9,8 @@ //! comments on them. //! +use crate::PageHeaderData; use crate::BLCKSZ; -use crate::{PageHeaderData, XLogRecord}; // // From pg_tablespace_d.h @@ -194,8 +194,6 @@ pub const XLR_RMGR_INFO_MASK: u8 = 0xF0; pub const XLOG_TBLSPC_CREATE: u8 = 0x00; pub const XLOG_TBLSPC_DROP: u8 = 0x10; -pub const SIZEOF_XLOGRECORD: u32 = size_of::() as u32; - // // from xlogrecord.h // @@ -219,8 +217,6 @@ pub const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has "hole" */ /* From transam.h */ pub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3; pub const INVALID_TRANSACTION_ID: u32 = 0; -pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000; -pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384; /* pg_control.h */ pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00; diff --git a/libs/postgres_ffi/src/xlog_utils.rs b/libs/postgres_ffi/src/xlog_utils.rs index 1873734753..a636bd2a97 100644 --- a/libs/postgres_ffi/src/xlog_utils.rs +++ b/libs/postgres_ffi/src/xlog_utils.rs @@ -26,6 +26,7 @@ use bytes::{Buf, Bytes}; use log::*; use serde::Serialize; +use std::ffi::OsStr; use std::fs::File; use std::io::prelude::*; use std::io::ErrorKind; @@ -78,19 +79,34 @@ pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize ) } -pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) { - let tli = u32::from_str_radix(&fname[0..8], 16).unwrap(); - let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo; - let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo; - (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli) +pub fn XLogFromFileName( + fname: &OsStr, + wal_seg_size: usize, +) -> anyhow::Result<(XLogSegNo, TimeLineID)> { + if let Some(fname_str) = fname.to_str() { + let tli = u32::from_str_radix(&fname_str[0..8], 16)?; + let log = u32::from_str_radix(&fname_str[8..16], 16)? as XLogSegNo; + let seg = u32::from_str_radix(&fname_str[16..24], 16)? as XLogSegNo; + Ok((log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli)) + } else { + anyhow::bail!("non-ut8 filename: {:?}", fname); + } } -pub fn IsXLogFileName(fname: &str) -> bool { - return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit()); +pub fn IsXLogFileName(fname: &OsStr) -> bool { + if let Some(fname) = fname.to_str() { + fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit()) + } else { + false + } } -pub fn IsPartialXLogFileName(fname: &str) -> bool { - fname.ends_with(".partial") && IsXLogFileName(&fname[0..fname.len() - 8]) +pub fn IsPartialXLogFileName(fname: &OsStr) -> bool { + if let Some(fname) = fname.to_str() { + fname.ends_with(".partial") && IsXLogFileName(OsStr::new(&fname[0..fname.len() - 8])) + } else { + false + } } /// If LSN points to the beginning of the page, then shift it to first record, diff --git a/libs/postgres_ffi/wal_craft/src/lib.rs b/libs/postgres_ffi/wal_craft/src/lib.rs index 949e3f4251..5c0abda522 100644 --- a/libs/postgres_ffi/wal_craft/src/lib.rs +++ b/libs/postgres_ffi/wal_craft/src/lib.rs @@ -7,6 +7,7 @@ use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ}; use postgres_ffi::{ XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD, }; +use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::{Duration, Instant}; @@ -26,7 +27,6 @@ macro_rules! xlog_utils_test { postgres_ffi::for_all_postgres_versions! { xlog_utils_test } -#[derive(Debug, Clone, PartialEq, Eq)] pub struct Conf { pub pg_version: u32, pub pg_distrib_dir: PathBuf, @@ -136,8 +136,8 @@ impl Conf { pub fn pg_waldump( &self, - first_segment_name: &str, - last_segment_name: &str, + first_segment_name: &OsStr, + last_segment_name: &OsStr, ) -> anyhow::Result { let first_segment_file = self.datadir.join(first_segment_name); let last_segment_file = self.datadir.join(last_segment_name); diff --git a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs index 79d45de67a..9eb3f0e95a 100644 --- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs +++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs @@ -4,6 +4,7 @@ use super::*; use crate::{error, info}; use regex::Regex; use std::cmp::min; +use std::ffi::OsStr; use std::fs::{self, File}; use std::io::Write; use std::{env, str::FromStr}; @@ -54,7 +55,7 @@ fn test_end_of_wal(test_name: &str) { .wal_dir() .read_dir() .unwrap() - .map(|f| f.unwrap().file_name().into_string().unwrap()) + .map(|f| f.unwrap().file_name()) .filter(|fname| IsXLogFileName(fname)) .max() .unwrap(); @@ -70,11 +71,11 @@ fn test_end_of_wal(test_name: &str) { start_lsn ); for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() { - let fname = file.file_name().into_string().unwrap(); + let fname = file.file_name(); if !IsXLogFileName(&fname) { continue; } - let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE); + let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE).unwrap(); let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE); if seg_start_lsn > u64::from(*start_lsn) { continue; @@ -93,10 +94,10 @@ fn test_end_of_wal(test_name: &str) { } } -fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &str) -> Lsn { +fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &OsStr) -> Lsn { // Get the actual end of WAL by pg_waldump let waldump_output = cfg - .pg_waldump("000000010000000000000001", last_segment) + .pg_waldump(OsStr::new("000000010000000000000001"), last_segment) .unwrap() .stderr; let waldump_output = std::str::from_utf8(&waldump_output).unwrap(); @@ -117,7 +118,7 @@ fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &str) -> Lsn { fn check_end_of_wal( cfg: &crate::Conf, - last_segment: &str, + last_segment: &OsStr, start_lsn: Lsn, expected_end_of_wal: Lsn, ) { @@ -132,7 +133,8 @@ fn check_end_of_wal( // Rename file to partial to actually find last valid lsn, then rename it back. fs::rename( cfg.wal_dir().join(last_segment), - cfg.wal_dir().join(format!("{}.partial", last_segment)), + cfg.wal_dir() + .join(format!("{}.partial", last_segment.to_str().unwrap())), ) .unwrap(); let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap(); @@ -142,7 +144,8 @@ fn check_end_of_wal( ); assert_eq!(wal_end, expected_end_of_wal); fs::rename( - cfg.wal_dir().join(format!("{}.partial", last_segment)), + cfg.wal_dir() + .join(format!("{}.partial", last_segment.to_str().unwrap())), cfg.wal_dir().join(last_segment), ) .unwrap(); diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index b5b69c9faf..45267ccda9 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -127,10 +127,6 @@ impl RemotePath { &self.0 } - pub fn extension(&self) -> Option<&str> { - self.0.extension() - } - pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> { self.0.strip_prefix(&p.0) } diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index f199b15554..7d284a6fc5 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -19,6 +19,7 @@ bincode.workspace = true bytes.workspace = true camino.workspace = true chrono.workspace = true +git-version.workspace = true hex = { workspace = true, features = ["serde"] } humantime.workspace = true hyper = { workspace = true, features = ["full"] } diff --git a/libs/utils/src/http/error.rs b/libs/utils/src/http/error.rs index 3d863a6518..5e05e4e713 100644 --- a/libs/utils/src/http/error.rs +++ b/libs/utils/src/http/error.rs @@ -82,7 +82,7 @@ impl ApiError { StatusCode::INTERNAL_SERVER_ERROR, ), ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status( - err.to_string(), + format!("{err:#}"), // use alternative formatting so that we give the cause without backtrace StatusCode::INTERNAL_SERVER_ERROR, ), } diff --git a/libs/utils/src/leaky_bucket.rs b/libs/utils/src/leaky_bucket.rs index a120dc0ac5..0cc58738c0 100644 --- a/libs/utils/src/leaky_bucket.rs +++ b/libs/utils/src/leaky_bucket.rs @@ -21,7 +21,13 @@ //! //! Another explaination can be found here: -use std::{sync::Mutex, time::Duration}; +use std::{ + sync::{ + atomic::{AtomicU64, Ordering}, + Mutex, + }, + time::Duration, +}; use tokio::{sync::Notify, time::Instant}; @@ -128,6 +134,7 @@ impl LeakyBucketState { pub struct RateLimiter { pub config: LeakyBucketConfig, + pub sleep_counter: AtomicU64, pub state: Mutex, /// a queue to provide this fair ordering. pub queue: Notify, @@ -144,6 +151,7 @@ impl Drop for Requeue<'_> { impl RateLimiter { pub fn with_initial_tokens(config: LeakyBucketConfig, initial_tokens: f64) -> Self { RateLimiter { + sleep_counter: AtomicU64::new(0), state: Mutex::new(LeakyBucketState::with_initial_tokens( &config, initial_tokens, @@ -163,15 +171,16 @@ impl RateLimiter { /// returns true if we did throttle pub async fn acquire(&self, count: usize) -> bool { - let mut throttled = false; - let start = tokio::time::Instant::now(); + let start_count = self.sleep_counter.load(Ordering::Acquire); + let mut end_count = start_count; + // wait until we are the first in the queue let mut notified = std::pin::pin!(self.queue.notified()); if !notified.as_mut().enable() { - throttled = true; notified.await; + end_count = self.sleep_counter.load(Ordering::Acquire); } // notify the next waiter in the queue when we are done. @@ -184,9 +193,22 @@ impl RateLimiter { .unwrap() .add_tokens(&self.config, start, count as f64); match res { - Ok(()) => return throttled, + Ok(()) => return end_count > start_count, Err(ready_at) => { - throttled = true; + struct Increment<'a>(&'a AtomicU64); + + impl Drop for Increment<'_> { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::AcqRel); + } + } + + // increment the counter after we finish sleeping (or cancel this task). + // this ensures that tasks that have already started the acquire will observe + // the new sleep count when they are allowed to resume on the notify. + let _inc = Increment(&self.sleep_counter); + end_count += 1; + tokio::time::sleep_until(ready_at).await; } } diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 03fb36caf8..aacc1e1dd5 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -92,6 +92,10 @@ pub mod toml_edit_ext; pub mod circuit_breaker; +// Re-export used in macro. Avoids adding git-version as dep in target crates. +#[doc(hidden)] +pub use git_version; + /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages /// /// we have several cases: @@ -131,7 +135,7 @@ macro_rules! project_git_version { ($const_identifier:ident) => { // this should try GIT_VERSION first only then git_version::git_version! const $const_identifier: &::core::primitive::str = { - const __COMMIT_FROM_GIT: &::core::primitive::str = git_version::git_version! { + const __COMMIT_FROM_GIT: &::core::primitive::str = $crate::git_version::git_version! { prefix = "", fallback = "unknown", args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha diff --git a/libs/utils/src/vec_map.rs b/libs/utils/src/vec_map.rs index 5f0028bacd..1fe048c6f0 100644 --- a/libs/utils/src/vec_map.rs +++ b/libs/utils/src/vec_map.rs @@ -120,32 +120,6 @@ impl VecMap { Ok((None, delta_size)) } - /// Split the map into two. - /// - /// The left map contains everything before `cutoff` (exclusive). - /// Right map contains `cutoff` and everything after (inclusive). - pub fn split_at(&self, cutoff: &K) -> (Self, Self) - where - K: Clone, - V: Clone, - { - let split_idx = self - .data - .binary_search_by_key(&cutoff, extract_key) - .unwrap_or_else(std::convert::identity); - - ( - VecMap { - data: self.data[..split_idx].to_vec(), - ordering: self.ordering, - }, - VecMap { - data: self.data[split_idx..].to_vec(), - ordering: self.ordering, - }, - ) - } - /// Move items from `other` to the end of `self`, leaving `other` empty. /// If the `other` ordering is different from `self` ordering /// `ExtendOrderingError` error will be returned. diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 0eb48d6823..f1fc3a86fe 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -27,7 +27,6 @@ crc32c.workspace = true either.workspace = true fail.workspace = true futures.workspace = true -git-version.workspace = true hex.workspace = true humantime.workspace = true humantime-serde.workspace = true diff --git a/pageserver/benches/bench_walredo.rs b/pageserver/benches/bench_walredo.rs index edc09d0bf2..45936cb3fa 100644 --- a/pageserver/benches/bench_walredo.rs +++ b/pageserver/benches/bench_walredo.rs @@ -1,7 +1,7 @@ //! Quantify a single walredo manager's throughput under N concurrent callers. //! //! The benchmark implementation ([`bench_impl`]) is parametrized by -//! - `redo_work` => [`Request::short_request`] or [`Request::medium_request`] +//! - `redo_work` => an async closure that takes a `PostgresRedoManager` and performs one redo //! - `n_redos` => number of times the benchmark shell execute the `redo_work` //! - `nclients` => number of clients (more on this shortly). //! @@ -10,7 +10,7 @@ //! Each task executes the `redo_work` `n_redos/nclients` times. //! //! We exercise the following combinations: -//! - `redo_work = short / medium`` +//! - `redo_work = ping / short / medium`` //! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]` //! //! We let `criterion` determine the `n_redos` using `iter_custom`. @@ -27,33 +27,43 @@ //! //! # Reference Numbers //! -//! 2024-04-15 on i3en.3xlarge +//! 2024-09-18 on im4gn.2xlarge //! //! ```text -//! short/1 time: [24.584 µs 24.737 µs 24.922 µs] -//! short/2 time: [33.479 µs 33.660 µs 33.888 µs] -//! short/4 time: [42.713 µs 43.046 µs 43.440 µs] -//! short/8 time: [71.814 µs 72.478 µs 73.240 µs] -//! short/16 time: [132.73 µs 134.45 µs 136.22 µs] -//! short/32 time: [258.31 µs 260.73 µs 263.27 µs] -//! short/64 time: [511.61 µs 514.44 µs 517.51 µs] -//! short/128 time: [992.64 µs 998.23 µs 1.0042 ms] -//! medium/1 time: [110.11 µs 110.50 µs 110.96 µs] -//! medium/2 time: [153.06 µs 153.85 µs 154.99 µs] -//! medium/4 time: [317.51 µs 319.92 µs 322.85 µs] -//! medium/8 time: [638.30 µs 644.68 µs 652.12 µs] -//! medium/16 time: [1.2651 ms 1.2773 ms 1.2914 ms] -//! medium/32 time: [2.5117 ms 2.5410 ms 2.5720 ms] -//! medium/64 time: [4.8088 ms 4.8555 ms 4.9047 ms] -//! medium/128 time: [8.8311 ms 8.9849 ms 9.1263 ms] +//! ping/1 time: [21.789 µs 21.918 µs 22.078 µs] +//! ping/2 time: [27.686 µs 27.812 µs 27.970 µs] +//! ping/4 time: [35.468 µs 35.671 µs 35.926 µs] +//! ping/8 time: [59.682 µs 59.987 µs 60.363 µs] +//! ping/16 time: [101.79 µs 102.37 µs 103.08 µs] +//! ping/32 time: [184.18 µs 185.15 µs 186.36 µs] +//! ping/64 time: [349.86 µs 351.45 µs 353.47 µs] +//! ping/128 time: [684.53 µs 687.98 µs 692.17 µs] +//! short/1 time: [31.833 µs 32.126 µs 32.428 µs] +//! short/2 time: [35.558 µs 35.756 µs 35.992 µs] +//! short/4 time: [44.850 µs 45.138 µs 45.484 µs] +//! short/8 time: [65.985 µs 66.379 µs 66.853 µs] +//! short/16 time: [127.06 µs 127.90 µs 128.87 µs] +//! short/32 time: [252.98 µs 254.70 µs 256.73 µs] +//! short/64 time: [497.13 µs 499.86 µs 503.26 µs] +//! short/128 time: [987.46 µs 993.45 µs 1.0004 ms] +//! medium/1 time: [137.91 µs 138.55 µs 139.35 µs] +//! medium/2 time: [192.00 µs 192.91 µs 194.07 µs] +//! medium/4 time: [389.62 µs 391.55 µs 394.01 µs] +//! medium/8 time: [776.80 µs 780.33 µs 784.77 µs] +//! medium/16 time: [1.5323 ms 1.5383 ms 1.5459 ms] +//! medium/32 time: [3.0120 ms 3.0226 ms 3.0350 ms] +//! medium/64 time: [5.7405 ms 5.7787 ms 5.8166 ms] +//! medium/128 time: [10.412 ms 10.574 ms 10.718 ms] //! ``` use anyhow::Context; use bytes::{Buf, Bytes}; use criterion::{BenchmarkId, Criterion}; +use once_cell::sync::Lazy; use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager}; use pageserver_api::{key::Key, shard::TenantShardId}; use std::{ + future::Future, sync::Arc, time::{Duration, Instant}, }; @@ -61,40 +71,59 @@ use tokio::{sync::Barrier, task::JoinSet}; use utils::{id::TenantId, lsn::Lsn}; fn bench(c: &mut Criterion) { - { - let nclients = [1, 2, 4, 8, 16, 32, 64, 128]; - for nclients in nclients { - let mut group = c.benchmark_group("short"); - group.bench_with_input( - BenchmarkId::from_parameter(nclients), - &nclients, - |b, nclients| { - let redo_work = Arc::new(Request::short_input()); - b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients)); - }, - ); - } - } - { - let nclients = [1, 2, 4, 8, 16, 32, 64, 128]; - for nclients in nclients { - let mut group = c.benchmark_group("medium"); - group.bench_with_input( - BenchmarkId::from_parameter(nclients), - &nclients, - |b, nclients| { - let redo_work = Arc::new(Request::medium_input()); - b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients)); - }, - ); - } + macro_rules! bench_group { + ($name:expr, $redo_work:expr) => {{ + let name: &str = $name; + let nclients = [1, 2, 4, 8, 16, 32, 64, 128]; + for nclients in nclients { + let mut group = c.benchmark_group(name); + group.bench_with_input( + BenchmarkId::from_parameter(nclients), + &nclients, + |b, nclients| { + b.iter_custom(|iters| bench_impl($redo_work, iters, *nclients)); + }, + ); + } + }}; } + // + // benchmark the protocol implementation + // + let pg_version = 14; + bench_group!( + "ping", + Arc::new(move |mgr: Arc| async move { + let _: () = mgr.ping(pg_version).await.unwrap(); + }) + ); + // + // benchmarks with actual record redo + // + let make_redo_work = |req: &'static Request| { + Arc::new(move |mgr: Arc| async move { + let page = req.execute(&mgr).await.unwrap(); + assert_eq!(page.remaining(), 8192); + }) + }; + bench_group!("short", { + static REQUEST: Lazy = Lazy::new(Request::short_input); + make_redo_work(&REQUEST) + }); + bench_group!("medium", { + static REQUEST: Lazy = Lazy::new(Request::medium_input); + make_redo_work(&REQUEST) + }); } criterion::criterion_group!(benches, bench); criterion::criterion_main!(benches); // Returns the sum of each client's wall-clock time spent executing their share of the n_redos. -fn bench_impl(redo_work: Arc, n_redos: u64, nclients: u64) -> Duration { +fn bench_impl(redo_work: Arc, n_redos: u64, nclients: u64) -> Duration +where + F: Fn(Arc) -> Fut + Send + Sync + 'static, + Fut: Future + Send + 'static, +{ let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap(); let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf()); @@ -135,17 +164,20 @@ fn bench_impl(redo_work: Arc, n_redos: u64, nclients: u64) -> Duration }) } -async fn client( +async fn client( mgr: Arc, start: Arc, - redo_work: Arc, + redo_work: Arc, n_redos: u64, -) -> Duration { +) -> Duration +where + F: Fn(Arc) -> Fut + Send + Sync + 'static, + Fut: Future + Send + 'static, +{ start.wait().await; let start = Instant::now(); for _ in 0..n_redos { - let page = redo_work.execute(&mgr).await.unwrap(); - assert_eq!(page.remaining(), 8192); + redo_work(Arc::clone(&mgr)).await; // The real pageserver will rarely if ever do 2 walredos in a row without // yielding to the executor. tokio::task::yield_now().await; diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index a68f45a6d9..2d95ac42e6 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -432,7 +432,7 @@ impl Client { self.mgmt_api_endpoint ); - self.request(Method::POST, &uri, req) + self.request(Method::PUT, &uri, req) .await? .json() .await diff --git a/pageserver/compaction/Cargo.toml b/pageserver/compaction/Cargo.toml index 52b58fc298..d4f89ac38a 100644 --- a/pageserver/compaction/Cargo.toml +++ b/pageserver/compaction/Cargo.toml @@ -12,7 +12,6 @@ anyhow.workspace = true async-stream.workspace = true clap = { workspace = true, features = ["string"] } futures.workspace = true -git-version.workspace = true itertools.workspace = true once_cell.workspace = true pageserver_api.workspace = true diff --git a/pageserver/ctl/Cargo.toml b/pageserver/ctl/Cargo.toml index 9592002de1..a753f806a0 100644 --- a/pageserver/ctl/Cargo.toml +++ b/pageserver/ctl/Cargo.toml @@ -10,7 +10,6 @@ license.workspace = true anyhow.workspace = true camino.workspace = true clap = { workspace = true, features = ["string"] } -git-version.workspace = true humantime.workspace = true pageserver = { path = ".." } pageserver_api.workspace = true diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index e9f197ec2d..e15f1c791b 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -13,7 +13,6 @@ use pageserver_api::{ use remote_storage::{RemotePath, RemoteStorageConfig}; use std::env; use storage_broker::Uri; -use utils::crashsafe::path_with_suffix_extension; use utils::logging::SecretString; use once_cell::sync::OnceCell; @@ -33,7 +32,7 @@ use crate::tenant::storage_layer::inmemory_layer::IndexEntry; use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; use crate::virtual_file; use crate::virtual_file::io_engine; -use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX}; +use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME}; /// Global state of pageserver. /// @@ -257,17 +256,6 @@ impl PageServerConf { .join(timeline_id.to_string()) } - pub(crate) fn timeline_delete_mark_file_path( - &self, - tenant_shard_id: TenantShardId, - timeline_id: TimelineId, - ) -> Utf8PathBuf { - path_with_suffix_extension( - self.timeline_path(&tenant_shard_id, &timeline_id), - TIMELINE_DELETE_MARK_SUFFIX, - ) - } - /// Turns storage remote path of a file into its local path. pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf { remote_path.with_base(&self.workdir) @@ -336,7 +324,6 @@ impl PageServerConf { max_vectored_read_bytes, image_compression, ephemeral_bytes_per_memory_kb, - compact_level0_phase1_value_access: _, l0_flush, virtual_file_direct_io, concurrent_tenant_warmup, @@ -491,11 +478,6 @@ pub struct ConfigurableSemaphore { } impl ConfigurableSemaphore { - pub const DEFAULT_INITIAL: NonZeroUsize = match NonZeroUsize::new(1) { - Some(x) => x, - None => panic!("const unwrap is not yet stable"), - }; - /// Initializse using a non-zero amount of permits. /// /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a @@ -516,12 +498,6 @@ impl ConfigurableSemaphore { } } -impl Default for ConfigurableSemaphore { - fn default() -> Self { - Self::new(Self::DEFAULT_INITIAL) - } -} - impl PartialEq for ConfigurableSemaphore { fn eq(&self, other: &Self) -> bool { // the number of permits can be increased at runtime, so we cannot really fulfill the @@ -558,16 +534,6 @@ mod tests { .expect("parse_and_validate"); } - #[test] - fn test_compactl0_phase1_access_mode_is_ignored_silently() { - let input = indoc::indoc! {r#" - [compact_level0_phase1_value_access] - mode = "streaming-kmerge" - validate = "key-lsn-value" - "#}; - toml_edit::de::from_str::(input).unwrap(); - } - /// If there's a typo in the pageserver config, we'd rather catch that typo /// and fail pageserver startup than silently ignoring the typo, leaving whoever /// made it in the believe that their config change is effective. diff --git a/pageserver/src/control_plane_client.rs b/pageserver/src/control_plane_client.rs index f6d1c35a8c..d0a967b920 100644 --- a/pageserver/src/control_plane_client.rs +++ b/pageserver/src/control_plane_client.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use futures::Future; use pageserver_api::{ - controller_api::NodeRegisterRequest, + controller_api::{AvailabilityZone, NodeRegisterRequest}, shard::TenantShardId, upcall_api::{ ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, @@ -148,10 +148,10 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient { .and_then(|jv| jv.as_str().map(|str| str.to_owned())); match az_id_from_metadata { - Some(az_id) => Some(az_id), + Some(az_id) => Some(AvailabilityZone(az_id)), None => { tracing::warn!("metadata.json does not contain an 'availability_zone_id' field"); - conf.availability_zone.clone() + conf.availability_zone.clone().map(AvailabilityZone) } } }; diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index d645f3b7b6..ba38120bf1 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -589,6 +589,10 @@ async fn timeline_create_handler( StatusCode::SERVICE_UNAVAILABLE, HttpErrorBody::from_msg(e.to_string()), ), + Err(e @ tenant::CreateTimelineError::AncestorArchived) => json_response( + StatusCode::NOT_ACCEPTABLE, + HttpErrorBody::from_msg(e.to_string()), + ), Err(tenant::CreateTimelineError::ShuttingDown) => json_response( StatusCode::SERVICE_UNAVAILABLE, HttpErrorBody::from_msg("tenant shutting down".to_string()), @@ -2955,7 +2959,7 @@ pub fn make_router( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive", |r| api_handler(r, timeline_preserve_initdb_handler), ) - .post( + .put( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config", |r| api_handler(r, timeline_archival_config_handler), ) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 72229d80be..366bd82903 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1177,10 +1177,10 @@ pub(crate) mod virtual_file_io_engine { } struct GlobalAndPerTimelineHistogramTimer<'a, 'c> { - global_metric: &'a Histogram, + global_latency_histo: &'a Histogram, // Optional because not all op types are tracked per-timeline - timeline_metric: Option<&'a Histogram>, + per_timeline_latency_histo: Option<&'a Histogram>, ctx: &'c RequestContext, start: std::time::Instant, @@ -1212,9 +1212,10 @@ impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> { elapsed } }; - self.global_metric.observe(ex_throttled.as_secs_f64()); - if let Some(timeline_metric) = self.timeline_metric { - timeline_metric.observe(ex_throttled.as_secs_f64()); + self.global_latency_histo + .observe(ex_throttled.as_secs_f64()); + if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo { + per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64()); } } } @@ -1240,10 +1241,32 @@ pub enum SmgrQueryType { #[derive(Debug)] pub(crate) struct SmgrQueryTimePerTimeline { - global_metrics: [Histogram; SmgrQueryType::COUNT], - per_timeline_getpage: Histogram, + global_started: [IntCounter; SmgrQueryType::COUNT], + global_latency: [Histogram; SmgrQueryType::COUNT], + per_timeline_getpage_started: IntCounter, + per_timeline_getpage_latency: Histogram, } +static SMGR_QUERY_STARTED_GLOBAL: Lazy = Lazy::new(|| { + register_int_counter_vec!( + // it's a counter, but, name is prepared to extend it to a histogram of queue depth + "pageserver_smgr_query_started_global_count", + "Number of smgr queries started, aggregated by query type.", + &["smgr_query_type"], + ) + .expect("failed to define a metric") +}); + +static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { + register_int_counter_vec!( + // it's a counter, but, name is prepared to extend it to a histogram of queue depth + "pageserver_smgr_query_started_count", + "Number of smgr queries started, aggregated by query type and tenant/timeline.", + &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { register_histogram_vec!( "pageserver_smgr_query_seconds", @@ -1319,14 +1342,20 @@ impl SmgrQueryTimePerTimeline { let tenant_id = tenant_shard_id.tenant_id.to_string(); let shard_slug = format!("{}", tenant_shard_id.shard_slug()); let timeline_id = timeline_id.to_string(); - let global_metrics = std::array::from_fn(|i| { + let global_started = std::array::from_fn(|i| { + let op = SmgrQueryType::from_repr(i).unwrap(); + SMGR_QUERY_STARTED_GLOBAL + .get_metric_with_label_values(&[op.into()]) + .unwrap() + }); + let global_latency = std::array::from_fn(|i| { let op = SmgrQueryType::from_repr(i).unwrap(); SMGR_QUERY_TIME_GLOBAL .get_metric_with_label_values(&[op.into()]) .unwrap() }); - let per_timeline_getpage = SMGR_QUERY_TIME_PER_TENANT_TIMELINE + let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE .get_metric_with_label_values(&[ SmgrQueryType::GetPageAtLsn.into(), &tenant_id, @@ -1334,18 +1363,32 @@ impl SmgrQueryTimePerTimeline { &timeline_id, ]) .unwrap(); + let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE + .get_metric_with_label_values(&[ + SmgrQueryType::GetPageAtLsn.into(), + &tenant_id, + &shard_slug, + &timeline_id, + ]) + .unwrap(); + Self { - global_metrics, - per_timeline_getpage, + global_started, + global_latency, + per_timeline_getpage_latency, + per_timeline_getpage_started, } } pub(crate) fn start_timer<'c: 'a, 'a>( &'a self, op: SmgrQueryType, ctx: &'c RequestContext, - ) -> Option { - let global_metric = &self.global_metrics[op as usize]; + ) -> Option { let start = Instant::now(); + + self.global_started[op as usize].inc(); + + // We subtract time spent throttled from the observed latency. match ctx.micros_spent_throttled.open() { Ok(()) => (), Err(error) => { @@ -1364,15 +1407,16 @@ impl SmgrQueryTimePerTimeline { } } - let timeline_metric = if matches!(op, SmgrQueryType::GetPageAtLsn) { - Some(&self.per_timeline_getpage) + let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) { + self.per_timeline_getpage_started.inc(); + Some(&self.per_timeline_getpage_latency) } else { None }; Some(GlobalAndPerTimelineHistogramTimer { - global_metric, - timeline_metric, + global_latency_histo: &self.global_latency[op as usize], + per_timeline_latency_histo, ctx, start, op, @@ -1423,9 +1467,12 @@ mod smgr_query_time_tests { let get_counts = || { let global: u64 = ops .iter() - .map(|op| metrics.global_metrics[*op as usize].get_sample_count()) + .map(|op| metrics.global_latency[*op as usize].get_sample_count()) .sum(); - (global, metrics.per_timeline_getpage.get_sample_count()) + ( + global, + metrics.per_timeline_getpage_latency.get_sample_count(), + ) }; let (pre_global, pre_per_tenant_timeline) = get_counts(); @@ -1487,7 +1534,7 @@ impl BasebackupQueryTime { pub(crate) fn start_recording<'c: 'a, 'a>( &'a self, ctx: &'c RequestContext, - ) -> BasebackupQueryTimeOngoingRecording<'_, '_> { + ) -> BasebackupQueryTimeOngoingRecording<'a, 'a> { let start = Instant::now(); match ctx.micros_spent_throttled.open() { Ok(()) => (), @@ -2576,6 +2623,12 @@ impl TimelineMetrics { let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]); } + let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[ + SmgrQueryType::GetPageAtLsn.into(), + tenant_id, + shard_id, + timeline_id, + ]); let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[ SmgrQueryType::GetPageAtLsn.into(), tenant_id, @@ -2592,6 +2645,8 @@ pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) { let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]); } + tenant_throttling::remove_tenant_metrics(tenant_shard_id); + // we leave the BROKEN_TENANTS_SET entry if any } @@ -3055,41 +3110,173 @@ pub mod tokio_epoll_uring { pub(crate) mod tenant_throttling { use metrics::{register_int_counter_vec, IntCounter}; use once_cell::sync::Lazy; + use utils::shard::TenantShardId; use crate::tenant::{self, throttle::Metric}; - pub(crate) struct TimelineGet { - wait_time: IntCounter, - count: IntCounter, + struct GlobalAndPerTenantIntCounter { + global: IntCounter, + per_tenant: IntCounter, } - pub(crate) static TIMELINE_GET: Lazy = Lazy::new(|| { - static WAIT_USECS: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_throttling_wait_usecs_sum_global", - "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.", + impl GlobalAndPerTenantIntCounter { + #[inline(always)] + pub(crate) fn inc(&self) { + self.inc_by(1) + } + #[inline(always)] + pub(crate) fn inc_by(&self, n: u64) { + self.global.inc_by(n); + self.per_tenant.inc_by(n); + } + } + + pub(crate) struct TimelineGet { + count_accounted_start: GlobalAndPerTenantIntCounter, + count_accounted_finish: GlobalAndPerTenantIntCounter, + wait_time: GlobalAndPerTenantIntCounter, + count_throttled: GlobalAndPerTenantIntCounter, + } + + static COUNT_ACCOUNTED_START: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_count_accounted_start_global", + "Count of tenant throttling starts, by kind of throttle.", &["kind"] ) - .unwrap() - }); - - static WAIT_COUNT: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_throttling_count_global", - "Count of tenant throttlings, by kind of throttle.", - &["kind"] - ) - .unwrap() - }); - - let kind = "timeline_get"; - TimelineGet { - wait_time: WAIT_USECS.with_label_values(&[kind]), - count: WAIT_COUNT.with_label_values(&[kind]), - } + .unwrap() + }); + static COUNT_ACCOUNTED_START_PER_TENANT: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_count_accounted_start", + "Count of tenant throttling starts, by kind of throttle.", + &["kind", "tenant_id", "shard_id"] + ) + .unwrap() + }); + static COUNT_ACCOUNTED_FINISH: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_count_accounted_finish_global", + "Count of tenant throttling finishes, by kind of throttle.", + &["kind"] + ) + .unwrap() + }); + static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_count_accounted_finish", + "Count of tenant throttling finishes, by kind of throttle.", + &["kind", "tenant_id", "shard_id"] + ) + .unwrap() + }); + static WAIT_USECS: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_wait_usecs_sum_global", + "Sum of microseconds that spent waiting throttle by kind of throttle.", + &["kind"] + ) + .unwrap() + }); + static WAIT_USECS_PER_TENANT: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_wait_usecs_sum", + "Sum of microseconds that spent waiting throttle by kind of throttle.", + &["kind", "tenant_id", "shard_id"] + ) + .unwrap() }); - impl Metric for &'static TimelineGet { + static WAIT_COUNT: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_count_global", + "Count of tenant throttlings, by kind of throttle.", + &["kind"] + ) + .unwrap() + }); + static WAIT_COUNT_PER_TENANT: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_tenant_throttling_count", + "Count of tenant throttlings, by kind of throttle.", + &["kind", "tenant_id", "shard_id"] + ) + .unwrap() + }); + + const KIND: &str = "timeline_get"; + + impl TimelineGet { + pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self { + let per_tenant_label_values = &[ + KIND, + &tenant_shard_id.tenant_id.to_string(), + &tenant_shard_id.shard_slug().to_string(), + ]; + TimelineGet { + count_accounted_start: { + GlobalAndPerTenantIntCounter { + global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]), + per_tenant: COUNT_ACCOUNTED_START_PER_TENANT + .with_label_values(per_tenant_label_values), + } + }, + count_accounted_finish: { + GlobalAndPerTenantIntCounter { + global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]), + per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT + .with_label_values(per_tenant_label_values), + } + }, + wait_time: { + GlobalAndPerTenantIntCounter { + global: WAIT_USECS.with_label_values(&[KIND]), + per_tenant: WAIT_USECS_PER_TENANT + .with_label_values(per_tenant_label_values), + } + }, + count_throttled: { + GlobalAndPerTenantIntCounter { + global: WAIT_COUNT.with_label_values(&[KIND]), + per_tenant: WAIT_COUNT_PER_TENANT + .with_label_values(per_tenant_label_values), + } + }, + } + } + } + + pub(crate) fn preinitialize_global_metrics() { + Lazy::force(&COUNT_ACCOUNTED_START); + Lazy::force(&COUNT_ACCOUNTED_FINISH); + Lazy::force(&WAIT_USECS); + Lazy::force(&WAIT_COUNT); + } + + pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) { + for m in &[ + &COUNT_ACCOUNTED_START_PER_TENANT, + &COUNT_ACCOUNTED_FINISH_PER_TENANT, + &WAIT_USECS_PER_TENANT, + &WAIT_COUNT_PER_TENANT, + ] { + let _ = m.remove_label_values(&[ + KIND, + &tenant_shard_id.tenant_id.to_string(), + &tenant_shard_id.shard_slug().to_string(), + ]); + } + } + + impl Metric for TimelineGet { + #[inline(always)] + fn accounting_start(&self) { + self.count_accounted_start.inc(); + } + #[inline(always)] + fn accounting_finish(&self) { + self.count_accounted_finish.inc(); + } #[inline(always)] fn observe_throttling( &self, @@ -3097,7 +3284,7 @@ pub(crate) mod tenant_throttling { ) { let val = u64::try_from(wait_time.as_micros()).unwrap(); self.wait_time.inc_by(val); - self.count.inc(); + self.count_throttled.inc(); } } } @@ -3227,11 +3414,14 @@ pub fn preinitialize_metrics() { } // countervecs - [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT] - .into_iter() - .for_each(|c| { - Lazy::force(c); - }); + [ + &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT, + &SMGR_QUERY_STARTED_GLOBAL, + ] + .into_iter() + .for_each(|c| { + Lazy::force(c); + }); // gauges WALRECEIVER_ACTIVE_MANAGERS.get(); @@ -3253,7 +3443,8 @@ pub fn preinitialize_metrics() { // Custom Lazy::force(&RECONSTRUCT_TIME); - Lazy::force(&tenant_throttling::TIMELINE_GET); Lazy::force(&BASEBACKUP_QUERY_TIME); Lazy::force(&COMPUTE_COMMANDS_COUNTERS); + + tenant_throttling::preinitialize_global_metrics(); } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 5f8766ca2c..7aa313f031 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -840,6 +840,36 @@ impl Timeline { Ok(total_size * BLCKSZ as u64) } + /// Get a KeySpace that covers all the Keys that are in use at AND below the given LSN. This is only used + /// for gc-compaction. + /// + /// gc-compaction cannot use the same `collect_keyspace` function as the legacy compaction because it + /// processes data at multiple LSNs and needs to be aware of the fact that some key ranges might need to + /// be kept only for a specific range of LSN. + /// + /// Consider the case that the user created branches at LSN 10 and 20, where the user created a table A at + /// LSN 10 and dropped that table at LSN 20. `collect_keyspace` at LSN 10 will return the key range + /// corresponding to that table, while LSN 20 won't. The keyspace info at a single LSN is not enough to + /// determine which keys to retain/drop for gc-compaction. + /// + /// For now, it only drops AUX-v1 keys. But in the future, the function will be extended to return the keyspace + /// to be retained for each of the branch LSN. + /// + /// The return value is (dense keyspace, sparse keyspace). + pub(crate) async fn collect_gc_compaction_keyspace( + &self, + ) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> { + let metadata_key_begin = Key::metadata_key_range().start; + let aux_v1_key = AUX_FILES_KEY; + let dense_keyspace = KeySpace { + ranges: vec![Key::MIN..aux_v1_key, aux_v1_key.next()..metadata_key_begin], + }; + Ok(( + dense_keyspace, + SparseKeySpace(KeySpace::single(Key::metadata_key_range())), + )) + } + /// /// Get a KeySpace that covers all the Keys that are in use at the given LSN. /// Anything that's not listed maybe removed from the underlying storage (from diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c6f0e48101..2aebf4f999 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -18,7 +18,6 @@ use camino::Utf8Path; use camino::Utf8PathBuf; use enumset::EnumSet; use futures::stream::FuturesUnordered; -use futures::FutureExt; use futures::StreamExt; use pageserver_api::models; use pageserver_api::models::AuxFilePolicy; @@ -34,6 +33,7 @@ use remote_storage::GenericRemoteStorage; use remote_storage::TimeoutOrCancel; use std::collections::BTreeMap; use std::fmt; +use std::future::Future; use std::sync::Weak; use std::time::SystemTime; use storage_broker::BrokerClientChannel; @@ -140,6 +140,7 @@ pub mod metadata; pub mod remote_timeline_client; pub mod storage_layer; +pub mod checks; pub mod config; pub mod mgr; pub mod secondary; @@ -301,7 +302,7 @@ pub struct Tenant { /// Throttle applied at the top of [`Timeline::get`]. /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance. pub(crate) timeline_get_throttle: - Arc>, + Arc>, /// An ongoing timeline detach concurrency limiter. /// @@ -562,6 +563,8 @@ pub enum CreateTimelineError { AncestorLsn(anyhow::Error), #[error("ancestor timeline is not active")] AncestorNotActive, + #[error("ancestor timeline is archived")] + AncestorArchived, #[error("tenant shutting down")] ShuttingDown, #[error(transparent)] @@ -1030,13 +1033,9 @@ impl Tenant { } Ok(TenantPreload { - timelines: Self::load_timeline_metadata( - self, - remote_timeline_ids, - remote_storage, - cancel, - ) - .await?, + timelines: self + .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel) + .await?, }) } @@ -1302,7 +1301,7 @@ impl Tenant { .await } - async fn load_timeline_metadata( + async fn load_timelines_metadata( self: &Arc, timeline_ids: HashSet, remote_storage: &GenericRemoteStorage, @@ -1310,33 +1309,10 @@ impl Tenant { ) -> anyhow::Result> { let mut part_downloads = JoinSet::new(); for timeline_id in timeline_ids { - let client = RemoteTimelineClient::new( - remote_storage.clone(), - self.deletion_queue_client.clone(), - self.conf, - self.tenant_shard_id, - timeline_id, - self.generation, - ); let cancel_clone = cancel.clone(); part_downloads.spawn( - async move { - debug!("starting index part download"); - - let index_part = client.download_index_file(&cancel_clone).await; - - debug!("finished index part download"); - - Result::<_, anyhow::Error>::Ok(TimelinePreload { - client, - timeline_id, - index_part, - }) - } - .map(move |res| { - res.with_context(|| format!("download index part for timeline {timeline_id}")) - }) - .instrument(info_span!("download_index_part", %timeline_id)), + self.load_timeline_metadata(timeline_id, remote_storage.clone(), cancel_clone) + .instrument(info_span!("download_index_part", %timeline_id)), ); } @@ -1347,8 +1323,7 @@ impl Tenant { next = part_downloads.join_next() => { match next { Some(result) => { - let preload_result = result.context("join preload task")?; - let preload = preload_result?; + let preload = result.context("join preload task")?; timeline_preloads.insert(preload.timeline_id, preload); }, None => { @@ -1365,6 +1340,36 @@ impl Tenant { Ok(timeline_preloads) } + fn load_timeline_metadata( + self: &Arc, + timeline_id: TimelineId, + remote_storage: GenericRemoteStorage, + cancel: CancellationToken, + ) -> impl Future { + let client = RemoteTimelineClient::new( + remote_storage.clone(), + self.deletion_queue_client.clone(), + self.conf, + self.tenant_shard_id, + timeline_id, + self.generation, + ); + async move { + debug_assert_current_span_has_tenant_and_timeline_id(); + debug!("starting index part download"); + + let index_part = client.download_index_file(&cancel).await; + + debug!("finished index part download"); + + TimelinePreload { + client, + timeline_id, + index_part, + } + } + } + pub(crate) async fn apply_timeline_archival_config( &self, timeline_id: TimelineId, @@ -1573,6 +1578,9 @@ impl Tenant { image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, ) -> anyhow::Result> { + use checks::check_valid_layermap; + use itertools::Itertools; + let tline = self .create_test_timeline(new_timeline_id, initdb_lsn, pg_version, ctx) .await?; @@ -1587,6 +1595,18 @@ impl Tenant { .force_create_image_layer(lsn, images, Some(initdb_lsn), ctx) .await?; } + let layer_names = tline + .layers + .read() + .await + .layer_map() + .unwrap() + .iter_historic_layers() + .map(|layer| layer.layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("invalid layermap: {err}"); + } Ok(tline) } @@ -1680,6 +1700,11 @@ impl Tenant { return Err(CreateTimelineError::AncestorNotActive); } + if ancestor_timeline.is_archived() == Some(true) { + info!("tried to branch archived timeline"); + return Err(CreateTimelineError::AncestorArchived); + } + if let Some(lsn) = ancestor_start_lsn.as_mut() { *lsn = lsn.align(); @@ -1950,9 +1975,6 @@ impl Tenant { TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => { panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state); } - TenantState::Loading => { - *current_state = TenantState::Activating(ActivatingFrom::Loading); - } TenantState::Attaching => { *current_state = TenantState::Activating(ActivatingFrom::Attaching); } @@ -2133,7 +2155,7 @@ impl Tenant { async fn set_stopping( &self, progress: completion::Barrier, - allow_transition_from_loading: bool, + _allow_transition_from_loading: bool, allow_transition_from_attaching: bool, ) -> Result<(), SetStoppingError> { let mut rx = self.state.subscribe(); @@ -2148,7 +2170,6 @@ impl Tenant { ); false } - TenantState::Loading => allow_transition_from_loading, TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true, }) .await @@ -2167,13 +2188,6 @@ impl Tenant { *current_state = TenantState::Stopping { progress }; true } - TenantState::Loading => { - if !allow_transition_from_loading { - unreachable!("3we ensured above that we're done with activation, and, there is no re-activation") - }; - *current_state = TenantState::Stopping { progress }; - true - } TenantState::Active => { // FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines // are created after the transition to Stopping. That's harmless, as the Timelines @@ -2229,7 +2243,7 @@ impl Tenant { // The load & attach routines own the tenant state until it has reached `Active`. // So, wait until it's done. rx.wait_for(|state| match state { - TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => { + TenantState::Activating(_) | TenantState::Attaching => { info!( "waiting for {} to turn Active|Broken|Stopping", <&'static str>::from(state) @@ -2249,7 +2263,7 @@ impl Tenant { let reason = reason.to_string(); self.state.send_modify(|current_state| { match *current_state { - TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => { + TenantState::Activating(_) | TenantState::Attaching => { unreachable!("we ensured above that we're done with activation, and, there is no re-activation") } TenantState::Active => { @@ -2293,7 +2307,7 @@ impl Tenant { loop { let current_state = receiver.borrow_and_update().clone(); match current_state { - TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => { + TenantState::Attaching | TenantState::Activating(_) => { // in these states, there's a chance that we can reach ::Active self.activate_now(); match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await { @@ -2815,7 +2829,7 @@ impl Tenant { gate: Gate::default(), timeline_get_throttle: Arc::new(throttle::Throttle::new( Tenant::get_timeline_get_throttle_config(conf, &attached_conf.tenant_conf), - &crate::metrics::tenant_throttling::TIMELINE_GET, + crate::metrics::tenant_throttling::TimelineGet::new(&tenant_shard_id), )), tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)), ongoing_timeline_detach: std::sync::Mutex::default(), @@ -3197,6 +3211,9 @@ impl Tenant { image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, ) -> anyhow::Result> { + use checks::check_valid_layermap; + use itertools::Itertools; + let tline = self .branch_timeline_test(src_timeline, dst_id, ancestor_lsn, ctx) .await?; @@ -3217,6 +3234,18 @@ impl Tenant { .force_create_image_layer(lsn, images, Some(ancestor_lsn), ctx) .await?; } + let layer_names = tline + .layers + .read() + .await + .layer_map() + .unwrap() + .iter_historic_layers() + .map(|layer| layer.layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("invalid layermap: {err}"); + } Ok(tline) } @@ -3594,7 +3623,7 @@ impl Tenant { start_lsn: Lsn, ancestor: Option>, last_aux_file_policy: Option, - ) -> anyhow::Result { + ) -> anyhow::Result> { let tenant_shard_id = self.tenant_shard_id; let resources = self.build_timeline_resources(new_timeline_id); @@ -4111,7 +4140,7 @@ pub(crate) mod harness { let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager)); let tenant = Arc::new(Tenant::new( - TenantState::Loading, + TenantState::Attaching, self.conf, AttachedTenantConf::try_from(LocationConf::attached_single( TenantConfOpt::from(self.tenant_conf.clone()), @@ -4164,9 +4193,18 @@ pub(crate) mod harness { let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1)); if records_neon { // For Neon wal records, we can decode without spawning postgres, so do so. - let base_img = base_img.expect("Neon WAL redo requires base image").1; - let mut page = BytesMut::new(); - page.extend_from_slice(&base_img); + let mut page = match (base_img, records.first()) { + (Some((_lsn, img)), _) => { + let mut page = BytesMut::new(); + page.extend_from_slice(&img); + page + } + (_, Some((_lsn, rec))) if rec.will_init() => BytesMut::new(), + _ => { + panic!("Neon WAL redo requires base image or will init record"); + } + }; + for (record_lsn, record) in records { apply_neon::apply_in_neon(&record, record_lsn, key, &mut page)?; } @@ -8470,4 +8508,135 @@ mod tests { Ok(()) } + + // Regression test for https://github.com/neondatabase/neon/issues/9012 + // Create an image arrangement where we have to read at different LSN ranges + // from a delta layer. This is achieved by overlapping an image layer on top of + // a delta layer. Like so: + // + // A B + // +----------------+ -> delta_layer + // | | ^ lsn + // | =========|-> nested_image_layer | + // | C | | + // +----------------+ | + // ======== -> baseline_image_layer +-------> key + // + // + // When querying the key range [A, B) we need to read at different LSN ranges + // for [A, C) and [C, B). This test checks that the described edge case is handled correctly. + #[tokio::test] + async fn test_vectored_read_with_nested_image_layer() -> anyhow::Result<()> { + let harness = TenantHarness::create("test_vectored_read_with_nested_image_layer").await?; + let (tenant, ctx) = harness.load().await; + + let will_init_keys = [2, 6]; + fn get_key(id: u32) -> Key { + let mut key = Key::from_hex("110000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + + let mut expected_key_values = HashMap::new(); + + let baseline_image_layer_lsn = Lsn(0x10); + let mut baseline_img_layer = Vec::new(); + for i in 0..5 { + let key = get_key(i); + let value = format!("value {i}@{baseline_image_layer_lsn}"); + + let removed = expected_key_values.insert(key, value.clone()); + assert!(removed.is_none()); + + baseline_img_layer.push((key, Bytes::from(value))); + } + + let nested_image_layer_lsn = Lsn(0x50); + let mut nested_img_layer = Vec::new(); + for i in 5..10 { + let key = get_key(i); + let value = format!("value {i}@{nested_image_layer_lsn}"); + + let removed = expected_key_values.insert(key, value.clone()); + assert!(removed.is_none()); + + nested_img_layer.push((key, Bytes::from(value))); + } + + let mut delta_layer_spec = Vec::default(); + let delta_layer_start_lsn = Lsn(0x20); + let mut delta_layer_end_lsn = delta_layer_start_lsn; + + for i in 0..10 { + let key = get_key(i); + let key_in_nested = nested_img_layer + .iter() + .any(|(key_with_img, _)| *key_with_img == key); + let lsn = { + if key_in_nested { + Lsn(nested_image_layer_lsn.0 + 0x10) + } else { + delta_layer_start_lsn + } + }; + + let will_init = will_init_keys.contains(&i); + if will_init { + delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init()))); + + expected_key_values.insert(key, "".to_string()); + } else { + let delta = format!("@{lsn}"); + delta_layer_spec.push(( + key, + lsn, + Value::WalRecord(NeonWalRecord::wal_append(&delta)), + )); + + expected_key_values + .get_mut(&key) + .expect("An image exists for each key") + .push_str(delta.as_str()); + } + delta_layer_end_lsn = std::cmp::max(delta_layer_start_lsn, lsn); + } + + delta_layer_end_lsn = Lsn(delta_layer_end_lsn.0 + 1); + + assert!( + nested_image_layer_lsn > delta_layer_start_lsn + && nested_image_layer_lsn < delta_layer_end_lsn + ); + + let tline = tenant + .create_test_timeline_with_layers( + TIMELINE_ID, + baseline_image_layer_lsn, + DEFAULT_PG_VERSION, + &ctx, + vec![DeltaLayerTestDesc::new_with_inferred_key_range( + delta_layer_start_lsn..delta_layer_end_lsn, + delta_layer_spec, + )], // delta layers + vec![ + (baseline_image_layer_lsn, baseline_img_layer), + (nested_image_layer_lsn, nested_img_layer), + ], // image layers + delta_layer_end_lsn, + ) + .await?; + + let keyspace = KeySpace::single(get_key(0)..get_key(10)); + let results = tline + .get_vectored(keyspace, delta_layer_end_lsn, &ctx) + .await + .expect("No vectored errors"); + for (key, res) in results { + let value = res.expect("No key errors"); + let expected_value = expected_key_values.remove(&key).expect("No unknown keys"); + assert_eq!(value, Bytes::from(expected_value)); + } + + Ok(()) + } } diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs new file mode 100644 index 0000000000..1e8fa8d1d6 --- /dev/null +++ b/pageserver/src/tenant/checks.rs @@ -0,0 +1,56 @@ +use std::collections::BTreeSet; + +use itertools::Itertools; + +use super::storage_layer::LayerName; + +/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong). +/// +/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example, +/// +/// ```plain +/// | | | | +/// | 1 | | 2 | | 3 | +/// | | | | | | +/// ``` +/// +/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have +/// the same LSN range. +/// +/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example, +/// +/// ```plain +/// | | | 2 | | | +/// | 1 | |-------| | 3 | +/// | | | 4 | | | +/// +/// If layer 2 and 4 contain the same single key, this is also a valid layer map. +pub fn check_valid_layermap(metadata: &[LayerName]) -> Option { + let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) + let mut all_delta_layers = Vec::new(); + for name in metadata { + if let LayerName::Delta(layer) = name { + if layer.key_range.start.next() != layer.key_range.end { + all_delta_layers.push(layer.clone()); + } + } + } + for layer in &all_delta_layers { + let lsn_range = &layer.lsn_range; + lsn_split_point.insert(lsn_range.start); + lsn_split_point.insert(lsn_range.end); + } + for layer in &all_delta_layers { + let lsn_range = layer.lsn_range.clone(); + let intersects = lsn_split_point.range(lsn_range).collect_vec(); + if intersects.len() > 1 { + let err = format!( + "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", + layer, + intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") + ); + return Some(err); + } + } + None +} diff --git a/pageserver/src/tenant/gc_block.rs b/pageserver/src/tenant/gc_block.rs index 8b41ba1746..1271d25b76 100644 --- a/pageserver/src/tenant/gc_block.rs +++ b/pageserver/src/tenant/gc_block.rs @@ -1,11 +1,29 @@ -use std::collections::HashMap; - -use utils::id::TimelineId; +use std::{collections::HashMap, time::Duration}; use super::remote_timeline_client::index::GcBlockingReason; +use tokio::time::Instant; +use utils::id::TimelineId; -type Storage = HashMap>; +type TimelinesBlocked = HashMap>; +#[derive(Default)] +struct Storage { + timelines_blocked: TimelinesBlocked, + /// The deadline before which we are blocked from GC so that + /// leases have a chance to be renewed. + lsn_lease_deadline: Option, +} + +impl Storage { + fn is_blocked_by_lsn_lease_deadline(&self) -> bool { + self.lsn_lease_deadline + .map(|d| Instant::now() < d) + .unwrap_or(false) + } +} + +/// GcBlock provides persistent (per-timeline) gc blocking and facilitates transient time based gc +/// blocking. #[derive(Default)] pub(crate) struct GcBlock { /// The timelines which have current reasons to block gc. @@ -13,6 +31,12 @@ pub(crate) struct GcBlock { /// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done /// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`. reasons: std::sync::Mutex, + + /// GC background task or manually run `Tenant::gc_iteration` holds a lock on this. + /// + /// Do not add any more features taking and forbidding taking this lock. It should be + /// `tokio::sync::Notify`, but that is rarely used. On the other side, [`GcBlock::insert`] + /// synchronizes with gc attempts by locking and unlocking this mutex. blocking: tokio::sync::Mutex<()>, } @@ -42,6 +66,20 @@ impl GcBlock { } } + /// Sets a deadline before which we cannot proceed to GC due to lsn lease. + /// + /// We do this as the leases mapping are not persisted to disk. By delaying GC by lease + /// length, we guarantee that all the leases we granted before will have a chance to renew + /// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle. + pub(super) fn set_lsn_lease_deadline(&self, lsn_lease_length: Duration) { + let deadline = Instant::now() + lsn_lease_length; + let mut g = self.reasons.lock().unwrap(); + g.lsn_lease_deadline = Some(deadline); + } + + /// Describe the current gc blocking reasons. + /// + /// TODO: make this json serializable. pub(crate) fn summary(&self) -> Option { let g = self.reasons.lock().unwrap(); @@ -64,7 +102,7 @@ impl GcBlock { ) -> anyhow::Result { let (added, uploaded) = { let mut g = self.reasons.lock().unwrap(); - let set = g.entry(timeline.timeline_id).or_default(); + let set = g.timelines_blocked.entry(timeline.timeline_id).or_default(); let added = set.insert(reason); // LOCK ORDER: intentionally hold the lock, see self.reasons. @@ -95,7 +133,7 @@ impl GcBlock { let (remaining_blocks, uploaded) = { let mut g = self.reasons.lock().unwrap(); - match g.entry(timeline.timeline_id) { + match g.timelines_blocked.entry(timeline.timeline_id) { Entry::Occupied(mut oe) => { let set = oe.get_mut(); set.remove(reason); @@ -109,7 +147,7 @@ impl GcBlock { } } - let remaining_blocks = g.len(); + let remaining_blocks = g.timelines_blocked.len(); // LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons let uploaded = timeline @@ -134,11 +172,11 @@ impl GcBlock { pub(crate) fn before_delete(&self, timeline: &super::Timeline) { let unblocked = { let mut g = self.reasons.lock().unwrap(); - if g.is_empty() { + if g.timelines_blocked.is_empty() { return; } - g.remove(&timeline.timeline_id); + g.timelines_blocked.remove(&timeline.timeline_id); BlockingReasons::clean_and_summarize(g).is_none() }; @@ -149,10 +187,11 @@ impl GcBlock { } /// Initialize with the non-deleted timelines of this tenant. - pub(crate) fn set_scanned(&self, scanned: Storage) { + pub(crate) fn set_scanned(&self, scanned: TimelinesBlocked) { let mut g = self.reasons.lock().unwrap(); - assert!(g.is_empty()); - g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty())); + assert!(g.timelines_blocked.is_empty()); + g.timelines_blocked + .extend(scanned.into_iter().filter(|(_, v)| !v.is_empty())); if let Some(reasons) = BlockingReasons::clean_and_summarize(g) { tracing::info!(summary=?reasons, "initialized with gc blocked"); @@ -166,6 +205,7 @@ pub(super) struct Guard<'a> { #[derive(Debug)] pub(crate) struct BlockingReasons { + tenant_blocked_by_lsn_lease_deadline: bool, timelines: usize, reasons: enumset::EnumSet, } @@ -174,8 +214,8 @@ impl std::fmt::Display for BlockingReasons { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "{} timelines block for {:?}", - self.timelines, self.reasons + "tenant_blocked_by_lsn_lease_deadline: {}, {} timelines block for {:?}", + self.tenant_blocked_by_lsn_lease_deadline, self.timelines, self.reasons ) } } @@ -183,13 +223,15 @@ impl std::fmt::Display for BlockingReasons { impl BlockingReasons { fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option { let mut reasons = enumset::EnumSet::empty(); - g.retain(|_key, value| { + g.timelines_blocked.retain(|_key, value| { reasons = reasons.union(*value); !value.is_empty() }); - if !g.is_empty() { + let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline(); + if !g.timelines_blocked.is_empty() || blocked_by_lsn_lease_deadline { Some(BlockingReasons { - timelines: g.len(), + tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline, + timelines: g.timelines_blocked.len(), reasons, }) } else { @@ -198,14 +240,17 @@ impl BlockingReasons { } fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option { - if g.is_empty() { + let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline(); + if g.timelines_blocked.is_empty() && !blocked_by_lsn_lease_deadline { None } else { let reasons = g + .timelines_blocked .values() .fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next)); Some(BlockingReasons { - timelines: g.len(), + tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline, + timelines: g.timelines_blocked.len(), reasons, }) } diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 2104f41531..1e7c1e10a5 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -949,6 +949,12 @@ impl TenantManager { (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => { match attach_conf.generation.cmp(&tenant.generation) { Ordering::Equal => { + if attach_conf.attach_mode == AttachmentMode::Single { + tenant + .gc_block + .set_lsn_lease_deadline(tenant.get_lsn_lease_length()); + } + // A transition from Attached to Attached in the same generation, we may // take our fast path and just provide the updated configuration // to the tenant. diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index dac6b2f893..99bd0ece57 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -1,13 +1,13 @@ //! Common traits and structs for layers pub mod delta_layer; +pub mod filter_iterator; pub mod image_layer; pub mod inmemory_layer; pub(crate) mod layer; mod layer_desc; mod layer_name; pub mod merge_iterator; - pub mod split_writer; use crate::context::{AccessStatsBehavior, RequestContext}; @@ -276,6 +276,16 @@ pub(crate) enum LayerId { InMemoryLayerId(InMemoryLayerFileId), } +/// Uniquely identify a layer visit by the layer +/// and LSN floor (or start LSN) of the reads. +/// The layer itself is not enough since we may +/// have different LSN lower bounds for delta layer reads. +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +struct LayerToVisitId { + layer_id: LayerId, + lsn_floor: Lsn, +} + /// Layer wrapper for the read path. Note that it is valid /// to use these layers even after external operations have /// been performed on them (compaction, freeze, etc.). @@ -287,9 +297,9 @@ pub(crate) enum ReadableLayer { /// A partial description of a read to be done. #[derive(Debug, Clone)] -struct ReadDesc { +struct LayerVisit { /// An id used to resolve the readable layer within the fringe - layer_id: LayerId, + layer_to_visit_id: LayerToVisitId, /// Lsn range for the read, used for selecting the next read lsn_range: Range, } @@ -303,12 +313,12 @@ struct ReadDesc { /// a two layer indexing scheme. #[derive(Debug)] pub(crate) struct LayerFringe { - planned_reads_by_lsn: BinaryHeap, - layers: HashMap, + planned_visits_by_lsn: BinaryHeap, + visit_reads: HashMap, } #[derive(Debug)] -struct LayerKeyspace { +struct LayerVisitReads { layer: ReadableLayer, target_keyspace: KeySpaceRandomAccum, } @@ -316,23 +326,23 @@ struct LayerKeyspace { impl LayerFringe { pub(crate) fn new() -> Self { LayerFringe { - planned_reads_by_lsn: BinaryHeap::new(), - layers: HashMap::new(), + planned_visits_by_lsn: BinaryHeap::new(), + visit_reads: HashMap::new(), } } pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range)> { - let read_desc = match self.planned_reads_by_lsn.pop() { + let read_desc = match self.planned_visits_by_lsn.pop() { Some(desc) => desc, None => return None, }; - let removed = self.layers.remove_entry(&read_desc.layer_id); + let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id); match removed { Some(( _, - LayerKeyspace { + LayerVisitReads { layer, mut target_keyspace, }, @@ -351,20 +361,24 @@ impl LayerFringe { keyspace: KeySpace, lsn_range: Range, ) { - let layer_id = layer.id(); - let entry = self.layers.entry(layer_id.clone()); + let layer_to_visit_id = LayerToVisitId { + layer_id: layer.id(), + lsn_floor: lsn_range.start, + }; + + let entry = self.visit_reads.entry(layer_to_visit_id.clone()); match entry { Entry::Occupied(mut entry) => { entry.get_mut().target_keyspace.add_keyspace(keyspace); } Entry::Vacant(entry) => { - self.planned_reads_by_lsn.push(ReadDesc { + self.planned_visits_by_lsn.push(LayerVisit { lsn_range, - layer_id: layer_id.clone(), + layer_to_visit_id: layer_to_visit_id.clone(), }); let mut accum = KeySpaceRandomAccum::new(); accum.add_keyspace(keyspace); - entry.insert(LayerKeyspace { + entry.insert(LayerVisitReads { layer, target_keyspace: accum, }); @@ -379,7 +393,7 @@ impl Default for LayerFringe { } } -impl Ord for ReadDesc { +impl Ord for LayerVisit { fn cmp(&self, other: &Self) -> Ordering { let ord = self.lsn_range.end.cmp(&other.lsn_range.end); if ord == std::cmp::Ordering::Equal { @@ -390,19 +404,19 @@ impl Ord for ReadDesc { } } -impl PartialOrd for ReadDesc { +impl PartialOrd for LayerVisit { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl PartialEq for ReadDesc { +impl PartialEq for LayerVisit { fn eq(&self, other: &Self) -> bool { self.lsn_range == other.lsn_range } } -impl Eq for ReadDesc {} +impl Eq for LayerVisit {} impl ReadableLayer { pub(crate) fn id(&self) -> LayerId { diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 34f1b15138..2b212cfed5 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -39,7 +39,7 @@ use crate::tenant::disk_btree::{ use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT; use crate::tenant::timeline::GetVectoredError; use crate::tenant::vectored_blob_io::{ - BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, + BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadCoalesceMode, VectoredReadPlanner, }; use crate::tenant::PageReconstructError; @@ -1021,13 +1021,30 @@ impl DeltaLayerInner { continue; } }; - + let view = BufView::new_slice(&blobs_buf.buf); for meta in blobs_buf.blobs.iter().rev() { if Some(meta.meta.key) == ignore_key_with_err { continue; } + let blob_read = meta.read(&view).await; + let blob_read = match blob_read { + Ok(buf) => buf, + Err(e) => { + reconstruct_state.on_key_error( + meta.meta.key, + PageReconstructError::Other(anyhow!(e).context(format!( + "Failed to decompress blob from virtual file {}", + self.file.path, + ))), + ); + + ignore_key_with_err = Some(meta.meta.key); + continue; + } + }; + + let value = Value::des(&blob_read); - let value = Value::des(&blobs_buf.buf[meta.start..meta.end]); let value = match value { Ok(v) => v, Err(e) => { @@ -1243,21 +1260,21 @@ impl DeltaLayerInner { buf.reserve(read.size()); let res = reader.read_blobs(&read, buf, ctx).await?; + let view = BufView::new_slice(&res.buf); + for blob in res.blobs { let key = blob.meta.key; let lsn = blob.meta.lsn; - let data = &res.buf[blob.start..blob.end]; + + let data = blob.read(&view).await?; #[cfg(debug_assertions)] - Value::des(data) + Value::des(&data) .with_context(|| { format!( - "blob failed to deserialize for {}@{}, {}..{}: {:?}", - blob.meta.key, - blob.meta.lsn, - blob.start, - blob.end, - utils::Hex(data) + "blob failed to deserialize for {}: {:?}", + blob, + utils::Hex(&data) ) }) .unwrap(); @@ -1265,15 +1282,15 @@ impl DeltaLayerInner { // is it an image or will_init walrecord? // FIXME: this could be handled by threading the BlobRef to the // VectoredReadBuilder - let will_init = crate::repository::ValueBytes::will_init(data) + let will_init = crate::repository::ValueBytes::will_init(&data) .inspect_err(|_e| { #[cfg(feature = "testing")] - tracing::error!(data=?utils::Hex(data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value"); + tracing::error!(data=?utils::Hex(&data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value"); }) .unwrap_or(false); per_blob_copy.clear(); - per_blob_copy.extend_from_slice(data); + per_blob_copy.extend_from_slice(&data); let (tmp, res) = writer .put_value_bytes( @@ -1538,8 +1555,11 @@ impl<'a> DeltaLayerIterator<'a> { .read_blobs(&plan, buf, self.ctx) .await?; let frozen_buf = blobs_buf.buf.freeze(); + let view = BufView::new_bytes(frozen_buf); for meta in blobs_buf.blobs.iter() { - let value = Value::des(&frozen_buf[meta.start..meta.end])?; + let blob_read = meta.read(&view).await?; + let value = Value::des(&blob_read)?; + next_batch.push_back((meta.meta.key, meta.meta.lsn, value)); } self.key_values_batch = next_batch; @@ -1916,9 +1936,13 @@ pub(crate) mod test { let blobs_buf = vectored_blob_reader .read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx) .await?; + let view = BufView::new_slice(&blobs_buf.buf); for meta in blobs_buf.blobs.iter() { - let value = &blobs_buf.buf[meta.start..meta.end]; - assert_eq!(value, entries_meta.index[&(meta.meta.key, meta.meta.lsn)]); + let value = meta.read(&view).await?; + assert_eq!( + &value[..], + &entries_meta.index[&(meta.meta.key, meta.meta.lsn)] + ); } buf = Some(blobs_buf.buf); diff --git a/pageserver/src/tenant/storage_layer/filter_iterator.rs b/pageserver/src/tenant/storage_layer/filter_iterator.rs new file mode 100644 index 0000000000..f45dd4b801 --- /dev/null +++ b/pageserver/src/tenant/storage_layer/filter_iterator.rs @@ -0,0 +1,205 @@ +use std::ops::Range; + +use anyhow::bail; +use pageserver_api::{ + key::Key, + keyspace::{KeySpace, SparseKeySpace}, +}; +use utils::lsn::Lsn; + +use crate::repository::Value; + +use super::merge_iterator::MergeIterator; + +/// A filter iterator over merge iterators (and can be easily extended to other types of iterators). +/// +/// The iterator will skip any keys not included in the keyspace filter. In other words, the keyspace filter contains the keys +/// to be retained. +pub struct FilterIterator<'a> { + inner: MergeIterator<'a>, + retain_key_filters: Vec>, + current_filter_idx: usize, +} + +impl<'a> FilterIterator<'a> { + pub fn create( + inner: MergeIterator<'a>, + dense_keyspace: KeySpace, + sparse_keyspace: SparseKeySpace, + ) -> anyhow::Result { + let mut retain_key_filters = Vec::new(); + retain_key_filters.extend(dense_keyspace.ranges); + retain_key_filters.extend(sparse_keyspace.0.ranges); + retain_key_filters.sort_by(|a, b| a.start.cmp(&b.start)); + // Verify key filters are non-overlapping and sorted + for window in retain_key_filters.windows(2) { + if window[0].end > window[1].start { + bail!( + "Key filters are overlapping: {:?} and {:?}", + window[0], + window[1] + ); + } + } + Ok(Self { + inner, + retain_key_filters, + current_filter_idx: 0, + }) + } + + pub async fn next(&mut self) -> anyhow::Result> { + while let Some(item) = self.inner.next().await? { + while self.current_filter_idx < self.retain_key_filters.len() + && item.0 >= self.retain_key_filters[self.current_filter_idx].end + { + // [filter region] [filter region] [filter region] + // ^ item + // ^ current filter + self.current_filter_idx += 1; + // [filter region] [filter region] [filter region] + // ^ item + // ^ current filter + } + if self.current_filter_idx >= self.retain_key_filters.len() { + // We already exhausted all filters, so we should return now + // [filter region] [filter region] [filter region] + // ^ item + // ^ current filter (nothing) + return Ok(None); + } + if self.retain_key_filters[self.current_filter_idx].contains(&item.0) { + // [filter region] [filter region] [filter region] + // ^ item + // ^ current filter + return Ok(Some(item)); + } + // If the key is not contained in the key retaining filters, continue to the next item. + // [filter region] [filter region] [filter region] + // ^ item + // ^ current filter + } + Ok(None) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use itertools::Itertools; + use pageserver_api::key::Key; + use utils::lsn::Lsn; + + use crate::{ + tenant::{ + harness::{TenantHarness, TIMELINE_ID}, + storage_layer::delta_layer::test::produce_delta_layer, + }, + DEFAULT_PG_VERSION, + }; + + async fn assert_filter_iter_equal( + filter_iter: &mut FilterIterator<'_>, + expect: &[(Key, Lsn, Value)], + ) { + let mut expect_iter = expect.iter(); + loop { + let o1 = filter_iter.next().await.unwrap(); + let o2 = expect_iter.next(); + assert_eq!(o1.is_some(), o2.is_some()); + if o1.is_none() && o2.is_none() { + break; + } + let (k1, l1, v1) = o1.unwrap(); + let (k2, l2, v2) = o2.unwrap(); + assert_eq!(&k1, k2); + assert_eq!(l1, *l2); + assert_eq!(&v1, v2); + } + } + + #[tokio::test] + async fn filter_keyspace_iterator() { + use crate::repository::Value; + use bytes::Bytes; + + let harness = TenantHarness::create("filter_iterator_filter_keyspace_iterator") + .await + .unwrap(); + let (tenant, ctx) = harness.load().await; + + let tline = tenant + .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .await + .unwrap(); + + fn get_key(id: u32) -> Key { + let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + const N: usize = 100; + let test_deltas1 = (0..N) + .map(|idx| { + ( + get_key(idx as u32), + Lsn(0x20 * ((idx as u64) % 10 + 1)), + Value::Image(Bytes::from(format!("img{idx:05}"))), + ) + }) + .collect_vec(); + let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx) + .await + .unwrap(); + + let merge_iter = MergeIterator::create( + &[resident_layer_1.get_as_delta(&ctx).await.unwrap()], + &[], + &ctx, + ); + + let mut filter_iter = FilterIterator::create( + merge_iter, + KeySpace { + ranges: vec![ + get_key(5)..get_key(10), + get_key(20)..get_key(30), + get_key(90)..get_key(110), + get_key(1000)..get_key(2000), + ], + }, + SparseKeySpace(KeySpace::default()), + ) + .unwrap(); + let mut result = Vec::new(); + result.extend(test_deltas1[5..10].iter().cloned()); + result.extend(test_deltas1[20..30].iter().cloned()); + result.extend(test_deltas1[90..100].iter().cloned()); + assert_filter_iter_equal(&mut filter_iter, &result).await; + + let merge_iter = MergeIterator::create( + &[resident_layer_1.get_as_delta(&ctx).await.unwrap()], + &[], + &ctx, + ); + + let mut filter_iter = FilterIterator::create( + merge_iter, + KeySpace { + ranges: vec![ + get_key(0)..get_key(10), + get_key(20)..get_key(30), + get_key(90)..get_key(95), + ], + }, + SparseKeySpace(KeySpace::default()), + ) + .unwrap(); + let mut result = Vec::new(); + result.extend(test_deltas1[0..10].iter().cloned()); + result.extend(test_deltas1[20..30].iter().cloned()); + result.extend(test_deltas1[90..95].iter().cloned()); + assert_filter_iter_equal(&mut filter_iter, &result).await; + } +} diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 5de2582ab7..940d169db0 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -36,7 +36,8 @@ use crate::tenant::disk_btree::{ }; use crate::tenant::timeline::GetVectoredError; use crate::tenant::vectored_blob_io::{ - BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner, + BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, + VectoredReadPlanner, }; use crate::tenant::PageReconstructError; use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; @@ -547,15 +548,15 @@ impl ImageLayerInner { let buf = BytesMut::with_capacity(buf_size); let blobs_buf = vectored_blob_reader.read_blobs(&read, buf, ctx).await?; - let frozen_buf = blobs_buf.buf.freeze(); + let view = BufView::new_bytes(frozen_buf); for meta in blobs_buf.blobs.iter() { - let img_buf = frozen_buf.slice(meta.start..meta.end); + let img_buf = meta.read(&view).await?; key_count += 1; writer - .put_image(meta.meta.key, img_buf, ctx) + .put_image(meta.meta.key, img_buf.into_bytes(), ctx) .await .context(format!("Storing key {}", meta.meta.key))?; } @@ -602,13 +603,28 @@ impl ImageLayerInner { match res { Ok(blobs_buf) => { let frozen_buf = blobs_buf.buf.freeze(); - + let view = BufView::new_bytes(frozen_buf); for meta in blobs_buf.blobs.iter() { - let img_buf = frozen_buf.slice(meta.start..meta.end); + let img_buf = meta.read(&view).await; + + let img_buf = match img_buf { + Ok(img_buf) => img_buf, + Err(e) => { + reconstruct_state.on_key_error( + meta.meta.key, + PageReconstructError::Other(anyhow!(e).context(format!( + "Failed to decompress blob from virtual file {}", + self.file.path, + ))), + ); + + continue; + } + }; reconstruct_state.update_key( &meta.meta.key, self.lsn, - Value::Image(img_buf), + Value::Image(img_buf.into_bytes()), ); } } @@ -1025,10 +1041,15 @@ impl<'a> ImageLayerIterator<'a> { let blobs_buf = vectored_blob_reader .read_blobs(&plan, buf, self.ctx) .await?; - let frozen_buf: Bytes = blobs_buf.buf.freeze(); + let frozen_buf = blobs_buf.buf.freeze(); + let view = BufView::new_bytes(frozen_buf); for meta in blobs_buf.blobs.iter() { - let img_buf = frozen_buf.slice(meta.start..meta.end); - next_batch.push_back((meta.meta.key, self.image_layer.lsn, Value::Image(img_buf))); + let img_buf = meta.read(&view).await?; + next_batch.push_back(( + meta.meta.key, + self.image_layer.lsn, + Value::Image(img_buf.into_bytes()), + )); } self.key_values_batch = next_batch; Ok(()) diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index b15cd4da39..f0e2ca5c83 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -439,11 +439,30 @@ impl Layer { fn record_access(&self, ctx: &RequestContext) { if self.0.access_stats.record_access(ctx) { - // Visibility was modified to Visible - tracing::info!( - "Layer {} became visible as a result of access", - self.0.desc.key() - ); + // Visibility was modified to Visible: maybe log about this + match ctx.task_kind() { + TaskKind::CalculateSyntheticSize + | TaskKind::GarbageCollector + | TaskKind::MgmtRequest => { + // This situation is expected in code paths do binary searches of the LSN space to resolve + // an LSN to a timestamp, which happens during GC, during GC cutoff calculations in synthetic size, + // and on-demand for certain HTTP API requests. + } + _ => { + // In all other contexts, it is unusual to do I/O involving layers which are not visible at + // some branch tip, so we log the fact that we are accessing something that the visibility + // calculation thought should not be visible. + // + // This case is legal in brief time windows: for example an in-flight getpage request can hold on to a layer object + // which was covered by a concurrent compaction. + tracing::info!( + "Layer {} became visible as a result of access", + self.0.desc.key() + ); + } + } + + // Update the timeline's visible bytes count if let Some(tl) = self.0.timeline.upgrade() { tl.metrics .visible_physical_size_gauge diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index 0b9bde4f57..9de70f14ee 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -1025,6 +1025,15 @@ fn access_stats() { assert_eq!(access_stats.latest_activity(), lowres_time(atime)); access_stats.set_visibility(LayerVisibilityHint::Visible); assert_eq!(access_stats.latest_activity(), lowres_time(atime)); + + // Recording access implicitly makes layer visible, if it wasn't already + let atime = UNIX_EPOCH + Duration::from_secs(2200000000); + access_stats.set_visibility(LayerVisibilityHint::Covered); + assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered); + assert!(access_stats.record_access_at(atime)); + access_stats.set_visibility(LayerVisibilityHint::Visible); + assert!(!access_stats.record_access_at(atime)); + access_stats.set_visibility(LayerVisibilityHint::Visible); } #[test] diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 478e9bb4f0..3f0f8a21c8 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -163,8 +163,6 @@ async fn compaction_loop(tenant: Arc, cancel: CancellationToken) { // How many errors we have seen consequtively let mut error_run_count = 0; - let mut last_throttle_flag_reset_at = Instant::now(); - TENANT_TASK_EVENTS.with_label_values(&["start"]).inc(); async { let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download); @@ -191,8 +189,6 @@ async fn compaction_loop(tenant: Arc, cancel: CancellationToken) { } } - - let sleep_duration; if period == Duration::ZERO { #[cfg(not(feature = "testing"))] @@ -207,12 +203,18 @@ async fn compaction_loop(tenant: Arc, cancel: CancellationToken) { }; // Run compaction - let IterationResult { output, elapsed } = iteration.run(tenant.compaction_iteration(&cancel, &ctx)).await; + let IterationResult { output, elapsed } = iteration + .run(tenant.compaction_iteration(&cancel, &ctx)) + .await; match output { Ok(has_pending_task) => { error_run_count = 0; // schedule the next compaction immediately in case there is a pending compaction task - sleep_duration = if has_pending_task { Duration::ZERO } else { period }; + sleep_duration = if has_pending_task { + Duration::ZERO + } else { + period + }; } Err(e) => { let wait_duration = backoff::exponential_backoff_duration_seconds( @@ -233,38 +235,20 @@ async fn compaction_loop(tenant: Arc, cancel: CancellationToken) { } // the duration is recorded by performance tests by enabling debug in this function - tracing::debug!(elapsed_ms=elapsed.as_millis(), "compaction iteration complete"); + tracing::debug!( + elapsed_ms = elapsed.as_millis(), + "compaction iteration complete" + ); }; - // Perhaps we did no work and the walredo process has been idle for some time: // give it a chance to shut down to avoid leaving walredo process running indefinitely. + // TODO: move this to a separate task (housekeeping loop) that isn't affected by the back-off, + // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens. if let Some(walredo_mgr) = &tenant.walredo_mgr { walredo_mgr.maybe_quiesce(period * 10); } - // TODO: move this (and walredo quiesce) to a separate task that isn't affected by the back-off, - // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens. - info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| { - let now = Instant::now(); - let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now); - let Stats { count_accounted, count_throttled, sum_throttled_usecs } = tenant.timeline_get_throttle.reset_stats(); - if count_throttled == 0 { - return; - } - let allowed_rps = tenant.timeline_get_throttle.steady_rps(); - let delta = now - prev; - info!( - n_seconds=%format_args!("{:.3}", - delta.as_secs_f64()), - count_accounted, - count_throttled, - sum_throttled_usecs, - allowed_rps=%format_args!("{allowed_rps:.0}"), - "shard was throttled in the last n_seconds" - ); - }); - // Sleep if tokio::time::timeout(sleep_duration, cancel.cancelled()) .await @@ -346,6 +330,7 @@ async fn gc_loop(tenant: Arc, cancel: CancellationToken) { RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download); let mut first = true; + tenant.gc_block.set_lsn_lease_deadline(tenant.get_lsn_lease_length()); loop { tokio::select! { _ = cancel.cancelled() => { @@ -363,7 +348,6 @@ async fn gc_loop(tenant: Arc, cancel: CancellationToken) { first = false; let delays = async { - delay_by_lease_length(tenant.get_lsn_lease_length(), &cancel).await?; random_init_delay(period, &cancel).await?; Ok::<_, Cancelled>(()) }; @@ -437,6 +421,7 @@ async fn gc_loop(tenant: Arc, cancel: CancellationToken) { async fn ingest_housekeeping_loop(tenant: Arc, cancel: CancellationToken) { TENANT_TASK_EVENTS.with_label_values(&["start"]).inc(); async { + let mut last_throttle_flag_reset_at = Instant::now(); loop { tokio::select! { _ = cancel.cancelled() => { @@ -483,6 +468,28 @@ async fn ingest_housekeeping_loop(tenant: Arc, cancel: CancellationToken kind: BackgroundLoopKind::IngestHouseKeeping, }; iteration.run(tenant.ingest_housekeeping()).await; + + // TODO: rename the background loop kind to something more generic, like, tenant housekeeping. + // Or just spawn another background loop for this throttle, it's not like it's super costly. + info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| { + let now = Instant::now(); + let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now); + let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.timeline_get_throttle.reset_stats(); + if count_throttled == 0 { + return; + } + let allowed_rps = tenant.timeline_get_throttle.steady_rps(); + let delta = now - prev; + info!( + n_seconds=%format_args!("{:.3}", delta.as_secs_f64()), + count_accounted = count_accounted_finish, // don't break existing log scraping + count_throttled, + sum_throttled_usecs, + count_accounted_start, // log after pre-existing fields to not break existing log scraping + allowed_rps=%format_args!("{allowed_rps:.0}"), + "shard was throttled in the last n_seconds" + ); + }); } } .await; @@ -538,28 +545,12 @@ pub(crate) async fn random_init_delay( let mut rng = rand::thread_rng(); rng.gen_range(Duration::ZERO..=period) }; - match tokio::time::timeout(d, cancel.cancelled()).await { Ok(_) => Err(Cancelled), Err(_) => Ok(()), } } -/// Delays GC by defaul lease length at restart. -/// -/// We do this as the leases mapping are not persisted to disk. By delaying GC by default -/// length, we gurantees that all the leases we granted before the restart will expire -/// when we run GC for the first time after the restart. -pub(crate) async fn delay_by_lease_length( - length: Duration, - cancel: &CancellationToken, -) -> Result<(), Cancelled> { - match tokio::time::timeout(length, cancel.cancelled()).await { - Ok(_) => Err(Cancelled), - Err(_) => Ok(()), - } -} - struct Iteration { started_at: Instant, period: Duration, diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs index f222e708e1..6a80953901 100644 --- a/pageserver/src/tenant/throttle.rs +++ b/pageserver/src/tenant/throttle.rs @@ -24,8 +24,10 @@ use crate::{context::RequestContext, task_mgr::TaskKind}; pub struct Throttle { inner: ArcSwap, metric: M, - /// will be turned into [`Stats::count_accounted`] - count_accounted: AtomicU64, + /// will be turned into [`Stats::count_accounted_start`] + count_accounted_start: AtomicU64, + /// will be turned into [`Stats::count_accounted_finish`] + count_accounted_finish: AtomicU64, /// will be turned into [`Stats::count_throttled`] count_throttled: AtomicU64, /// will be turned into [`Stats::sum_throttled_usecs`] @@ -43,17 +45,21 @@ pub struct Observation { pub wait_time: Duration, } pub trait Metric { + fn accounting_start(&self); + fn accounting_finish(&self); fn observe_throttling(&self, observation: &Observation); } /// See [`Throttle::reset_stats`]. pub struct Stats { - // Number of requests that were subject to throttling, i.e., requests of the configured [`Config::task_kinds`]. - pub count_accounted: u64, - // Subset of the `accounted` requests that were actually throttled. - // Note that the numbers are stored as two independent atomics, so, there might be a slight drift. + /// Number of requests that started [`Throttle::throttle`] calls. + pub count_accounted_start: u64, + /// Number of requests that finished [`Throttle::throttle`] calls. + pub count_accounted_finish: u64, + /// Subset of the `accounted` requests that were actually throttled. + /// Note that the numbers are stored as two independent atomics, so, there might be a slight drift. pub count_throttled: u64, - // Sum of microseconds that throttled requests spent waiting for throttling. + /// Sum of microseconds that throttled requests spent waiting for throttling. pub sum_throttled_usecs: u64, } @@ -65,7 +71,8 @@ where Self { inner: ArcSwap::new(Arc::new(Self::new_inner(config))), metric, - count_accounted: AtomicU64::new(0), + count_accounted_start: AtomicU64::new(0), + count_accounted_finish: AtomicU64::new(0), count_throttled: AtomicU64::new(0), sum_throttled_usecs: AtomicU64::new(0), } @@ -117,11 +124,13 @@ where /// This method allows retrieving & resetting that flag. /// Useful for periodic reporting. pub fn reset_stats(&self) -> Stats { - let count_accounted = self.count_accounted.swap(0, Ordering::Relaxed); + let count_accounted_start = self.count_accounted_start.swap(0, Ordering::Relaxed); + let count_accounted_finish = self.count_accounted_finish.swap(0, Ordering::Relaxed); let count_throttled = self.count_throttled.swap(0, Ordering::Relaxed); let sum_throttled_usecs = self.sum_throttled_usecs.swap(0, Ordering::Relaxed); Stats { - count_accounted, + count_accounted_start, + count_accounted_finish, count_throttled, sum_throttled_usecs, } @@ -139,9 +148,12 @@ where }; let start = std::time::Instant::now(); + self.metric.accounting_start(); + self.count_accounted_start.fetch_add(1, Ordering::Relaxed); let did_throttle = inner.rate_limiter.acquire(key_count).await; + self.count_accounted_finish.fetch_add(1, Ordering::Relaxed); + self.metric.accounting_finish(); - self.count_accounted.fetch_add(1, Ordering::Relaxed); if did_throttle { self.count_throttled.fetch_add(1, Ordering::Relaxed); let now = Instant::now(); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index f66491d962..157c6ab91e 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -112,7 +112,7 @@ use pageserver_api::reltag::RelTag; use pageserver_api::shard::ShardIndex; use postgres_connection::PgConnectionConfig; -use postgres_ffi::to_pg_timestamp; +use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE}; use utils::{ completion, generation::Generation, @@ -196,9 +196,8 @@ fn drop_wlock(rlock: tokio::sync::RwLockWriteGuard<'_, T>) { /// The outward-facing resources required to build a Timeline pub struct TimelineResources { pub remote_client: RemoteTimelineClient, - pub timeline_get_throttle: Arc< - crate::tenant::throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>, - >, + pub timeline_get_throttle: + Arc>, pub l0_flush_global_state: l0_flush::L0FlushGlobalState, } @@ -406,9 +405,8 @@ pub struct Timeline { gc_lock: tokio::sync::Mutex<()>, /// Cloned from [`super::Tenant::timeline_get_throttle`] on construction. - timeline_get_throttle: Arc< - crate::tenant::throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>, - >, + timeline_get_throttle: + Arc>, /// Keep aux directory cache to avoid it's reconstruction on each update pub(crate) aux_files: tokio::sync::Mutex, @@ -1339,6 +1337,10 @@ impl Timeline { _ctx: &RequestContext, ) -> anyhow::Result { let lease = { + // Normalize the requested LSN to be aligned, and move to the first record + // if it points to the beginning of the page (header). + let lsn = xlog_utils::normalize_lsn(lsn, WAL_SEGMENT_SIZE); + let mut gc_info = self.gc_info.write().unwrap(); let valid_until = SystemTime::now() + length; @@ -3599,7 +3601,7 @@ impl Timeline { ctx, ) .await - .map_err(|e| FlushLayerError::from_anyhow(self, e))?; + .map_err(|e| FlushLayerError::from_anyhow(self, e.into()))?; if self.cancel.is_cancelled() { return Err(FlushLayerError::Cancelled); @@ -3838,16 +3840,20 @@ impl Timeline { partition_size: u64, flags: EnumSet, ctx: &RequestContext, - ) -> anyhow::Result<((KeyPartitioning, SparseKeyPartitioning), Lsn)> { + ) -> Result<((KeyPartitioning, SparseKeyPartitioning), Lsn), CompactionError> { let Ok(mut partitioning_guard) = self.partitioning.try_lock() else { // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline. // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()` // and hence before the compaction task starts. - anyhow::bail!("repartition() called concurrently, this should not happen"); + return Err(CompactionError::Other(anyhow!( + "repartition() called concurrently, this should not happen" + ))); }; let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard; if lsn < *partition_lsn { - anyhow::bail!("repartition() called with LSN going backwards, this should not happen"); + return Err(CompactionError::Other(anyhow!( + "repartition() called with LSN going backwards, this should not happen" + ))); } let distance = lsn.0 - partition_lsn.0; @@ -4015,6 +4021,7 @@ impl Timeline { // partition, so flush it to disk. let (desc, path) = image_layer_writer.finish(ctx).await?; let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?; + info!("created image layer for rel {}", image_layer.local_path()); Ok(ImageLayerCreationOutcome { image: Some(image_layer), next_start_key: img_range.end, @@ -4104,6 +4111,10 @@ impl Timeline { // partition, so flush it to disk. let (desc, path) = image_layer_writer.finish(ctx).await?; let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?; + info!( + "created image layer for metadata {}", + image_layer.local_path() + ); Ok(ImageLayerCreationOutcome { image: Some(image_layer), next_start_key: img_range.end, @@ -4311,7 +4322,9 @@ impl Timeline { timer.stop_and_record(); // Creating image layers may have caused some previously visible layers to be covered - self.update_layer_visibility().await?; + if !image_layers.is_empty() { + self.update_layer_visibility().await?; + } Ok(image_layers) } @@ -4442,6 +4455,12 @@ pub(crate) enum CompactionError { Other(anyhow::Error), } +impl CompactionError { + pub fn is_cancelled(&self) -> bool { + matches!(self, CompactionError::ShuttingDown) + } +} + impl From for CompactionError { fn from(err: CollectKeySpaceError) -> Self { match err { @@ -5373,7 +5392,8 @@ impl Timeline { /// Force create an image layer and place it into the layer map. /// /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`] - /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are placed into the layer map in one run. + /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are + /// placed into the layer map in one run AND be validated. #[cfg(test)] pub(super) async fn force_create_image_layer( self: &Arc, @@ -5407,7 +5427,7 @@ impl Timeline { } let (desc, path) = image_layer_writer.finish(ctx).await?; let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?; - + info!("force created image layer {}", image_layer.local_path()); { let mut guard = self.layers.write().await; guard.open_mut().unwrap().force_insert_layer(image_layer); @@ -5419,7 +5439,8 @@ impl Timeline { /// Force create a delta layer and place it into the layer map. /// /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`] - /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are placed into the layer map in one run. + /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are + /// placed into the layer map in one run AND be validated. #[cfg(test)] pub(super) async fn force_create_delta_layer( self: &Arc, @@ -5445,33 +5466,6 @@ impl Timeline { if let Some(check_start_lsn) = check_start_lsn { assert!(deltas.lsn_range.start >= check_start_lsn); } - // check if the delta layer does not violate the LSN invariant, the legacy compaction should always produce a batch of - // layers of the same start/end LSN, and so should the force inserted layer - { - /// Checks if a overlaps with b, assume a/b = [start, end). - pub fn overlaps_with(a: &Range, b: &Range) -> bool { - !(a.end <= b.start || b.end <= a.start) - } - - if deltas.key_range.start.next() != deltas.key_range.end { - let guard = self.layers.read().await; - let mut invalid_layers = - guard.layer_map()?.iter_historic_layers().filter(|layer| { - layer.is_delta() - && overlaps_with(&layer.lsn_range, &deltas.lsn_range) - && layer.lsn_range != deltas.lsn_range - // skip single-key layer files - && layer.key_range.start.next() != layer.key_range.end - }); - if let Some(layer) = invalid_layers.next() { - // If a delta layer overlaps with another delta layer AND their LSN range is not the same, panic - panic!( - "inserted layer violates delta layer LSN invariant: current_lsn_range={}..{}, conflict_lsn_range={}..{}", - deltas.lsn_range.start, deltas.lsn_range.end, layer.lsn_range.start, layer.lsn_range.end - ); - } - } - } let mut delta_layer_writer = DeltaLayerWriter::new( self.conf, self.timeline_id, @@ -5486,7 +5480,7 @@ impl Timeline { } let (desc, path) = delta_layer_writer.finish(deltas.key_range.end, ctx).await?; let delta_layer = Layer::finish_creating(self.conf, self, desc, &path)?; - + info!("force created delta layer {}", delta_layer.local_path()); { let mut guard = self.layers.write().await; guard.open_mut().unwrap().force_insert_layer(delta_layer); diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index d1f06e3480..3de386a2d5 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -29,7 +29,9 @@ use utils::id::TimelineId; use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}; use crate::page_cache; +use crate::tenant::checks::check_valid_layermap; use crate::tenant::remote_timeline_client::WaitCompletionError; +use crate::tenant::storage_layer::filter_iterator::FilterIterator; use crate::tenant::storage_layer::merge_iterator::MergeIterator; use crate::tenant::storage_layer::split_writer::{ SplitDeltaLayerWriter, SplitImageLayerWriter, SplitWriterResult, @@ -388,7 +390,7 @@ impl Timeline { // error but continue. // // Suppress error when it's due to cancellation - if !self.cancel.is_cancelled() { + if !self.cancel.is_cancelled() && !err.is_cancelled() { tracing::error!("could not compact, repartitioning keyspace failed: {err:?}"); } (1, false) @@ -1771,6 +1773,7 @@ impl Timeline { gc_cutoff, lowest_retain_lsn ); + // Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs. // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point. let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) @@ -1788,20 +1791,12 @@ impl Timeline { stat.visit_image_layer(desc.file_size()); } } - for layer in &layer_selection { - let desc = layer.layer_desc(); - let key_range = &desc.key_range; - if desc.is_delta() && key_range.start.next() != key_range.end { - let lsn_range = desc.lsn_range.clone(); - let intersects = lsn_split_point.range(lsn_range).collect_vec(); - if intersects.len() > 1 { - bail!( - "cannot run gc-compaction because it violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", - desc.key(), - intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") - ); - } - } + let layer_names: Vec = layer_selection + .iter() + .map(|layer| layer.layer_desc().layer_name()) + .collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { + bail!("cannot run gc-compaction because {}", err); } // The maximum LSN we are processing in this compaction loop let end_lsn = layer_selection @@ -1827,7 +1822,12 @@ impl Timeline { image_layers.push(layer); } } - let mut merge_iter = MergeIterator::create(&delta_layers, &image_layers, ctx); + let (dense_ks, sparse_ks) = self.collect_gc_compaction_keyspace().await?; + let mut merge_iter = FilterIterator::create( + MergeIterator::create(&delta_layers, &image_layers, ctx), + dense_ks, + sparse_ks, + )?; // Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas. // Data of the same key. let mut accumulated_values = Vec::new(); diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index dc4118bb4a..90db08ea81 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -135,25 +135,6 @@ async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<( .context("delete_all") } -// This function removs remaining traces of a timeline on disk. -// Namely: metadata file, timeline directory, delete mark. -// Note: io::ErrorKind::NotFound are ignored for metadata and timeline dir. -// delete mark should be present because it is the last step during deletion. -// (nothing can fail after its deletion) -async fn cleanup_remaining_timeline_fs_traces( - conf: &PageServerConf, - tenant_shard_id: TenantShardId, - timeline_id: TimelineId, -) -> anyhow::Result<()> { - // Remove delete mark - // TODO: once we are confident that no more exist in the field, remove this - // line. It cleans up a legacy marker file that might in rare cases be present. - tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_shard_id, timeline_id)) - .await - .or_else(fs_ext::ignore_not_found) - .context("remove delete mark") -} - /// It is important that this gets called when DeletionGuard is being held. /// For more context see comments in [`DeleteTimelineFlow::prepare`] async fn remove_timeline_from_tenant( @@ -194,12 +175,10 @@ async fn remove_timeline_from_tenant( /// 7. Delete mark file /// /// It is resumable from any step in case a crash/restart occurs. -/// There are three entrypoints to the process: +/// There are two entrypoints to the process: /// 1. [`DeleteTimelineFlow::run`] this is the main one called by a management api handler. /// 2. [`DeleteTimelineFlow::resume_deletion`] is called during restarts when local metadata is still present /// and we possibly neeed to continue deletion of remote files. -/// 3. [`DeleteTimelineFlow::cleanup_remaining_timeline_fs_traces`] is used when we deleted remote -/// index but still have local metadata, timeline directory and delete mark. /// /// Note the only other place that messes around timeline delete mark is the logic that scans directory with timelines during tenant load. #[derive(Default)] @@ -311,18 +290,6 @@ impl DeleteTimelineFlow { Ok(()) } - #[instrument(skip_all, fields(%timeline_id))] - pub async fn cleanup_remaining_timeline_fs_traces( - tenant: &Tenant, - timeline_id: TimelineId, - ) -> anyhow::Result<()> { - let r = - cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_shard_id, timeline_id) - .await; - info!("Done"); - r - } - fn prepare( tenant: &Tenant, timeline_id: TimelineId, diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 2f6cb4d73a..26c2861b93 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -30,8 +30,8 @@ use crate::{ pgdatadir_mapping::CollectKeySpaceError, task_mgr::{self, TaskKind, BACKGROUND_RUNTIME}, tenant::{ - storage_layer::LayerVisibilityHint, tasks::BackgroundLoopKind, timeline::EvictionError, - LogicalSizeCalculationCause, Tenant, + size::CalculateSyntheticSizeError, storage_layer::LayerVisibilityHint, + tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant, }, }; @@ -557,6 +557,8 @@ impl Timeline { gather_result = gather => { match gather_result { Ok(_) => {}, + // It can happen sometimes that we hit this instead of the cancellation token firing above + Err(CalculateSyntheticSizeError::Cancelled) => {} Err(e) => { // We don't care about the result, but, if it failed, we should log it, // since consumption metric might be hitting the cached value and diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs index 553edf6d8b..aa37a45898 100644 --- a/pageserver/src/tenant/vectored_blob_io.rs +++ b/pageserver/src/tenant/vectored_blob_io.rs @@ -16,8 +16,9 @@ //! Note that the vectored blob api does *not* go through the page cache. use std::collections::BTreeMap; +use std::ops::Deref; -use bytes::BytesMut; +use bytes::{Bytes, BytesMut}; use pageserver_api::key::Key; use tokio::io::AsyncWriteExt; use tokio_epoll_uring::BoundedBuf; @@ -35,11 +36,123 @@ pub struct BlobMeta { pub lsn: Lsn, } -/// Blob offsets into [`VectoredBlobsBuf::buf`] +/// A view into the vectored blobs read buffer. +#[derive(Clone, Debug)] +pub(crate) enum BufView<'a> { + Slice(&'a [u8]), + Bytes(bytes::Bytes), +} + +impl<'a> BufView<'a> { + /// Creates a new slice-based view on the blob. + pub fn new_slice(slice: &'a [u8]) -> Self { + Self::Slice(slice) + } + + /// Creates a new [`bytes::Bytes`]-based view on the blob. + pub fn new_bytes(bytes: bytes::Bytes) -> Self { + Self::Bytes(bytes) + } + + /// Convert the view into `Bytes`. + /// + /// If using slice as the underlying storage, the copy will be an O(n) operation. + pub fn into_bytes(self) -> Bytes { + match self { + BufView::Slice(slice) => Bytes::copy_from_slice(slice), + BufView::Bytes(bytes) => bytes, + } + } + + /// Creates a sub-view of the blob based on the range. + fn view(&self, range: std::ops::Range) -> Self { + match self { + BufView::Slice(slice) => BufView::Slice(&slice[range]), + BufView::Bytes(bytes) => BufView::Bytes(bytes.slice(range)), + } + } +} + +impl<'a> Deref for BufView<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + match self { + BufView::Slice(slice) => slice, + BufView::Bytes(bytes) => bytes, + } + } +} + +impl<'a> AsRef<[u8]> for BufView<'a> { + fn as_ref(&self) -> &[u8] { + match self { + BufView::Slice(slice) => slice, + BufView::Bytes(bytes) => bytes.as_ref(), + } + } +} + +impl<'a> From<&'a [u8]> for BufView<'a> { + fn from(value: &'a [u8]) -> Self { + Self::new_slice(value) + } +} + +impl From for BufView<'_> { + fn from(value: Bytes) -> Self { + Self::new_bytes(value) + } +} + +/// Blob offsets into [`VectoredBlobsBuf::buf`]. The byte ranges is potentially compressed, +/// subject to [`VectoredBlob::compression_bits`]. pub struct VectoredBlob { - pub start: usize, - pub end: usize, + /// Blob metadata. pub meta: BlobMeta, + /// Start offset. + start: usize, + /// End offset. + end: usize, + /// Compression used on the the blob. + compression_bits: u8, +} + +impl VectoredBlob { + /// Reads a decompressed view of the blob. + pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result, std::io::Error> { + let view = buf.view(self.start..self.end); + + match self.compression_bits { + BYTE_UNCOMPRESSED => Ok(view), + BYTE_ZSTD => { + let mut decompressed_vec = Vec::new(); + let mut decoder = + async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec); + decoder.write_all(&view).await?; + decoder.flush().await?; + // Zero-copy conversion from `Vec` to `Bytes` + Ok(BufView::new_bytes(Bytes::from(decompressed_vec))) + } + bits => { + let error = std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}", self.meta.key, self.meta.lsn, self.start, self.end), + ); + Err(error) + } + } + } +} + +impl std::fmt::Display for VectoredBlob { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}@{}, {}..{}", + self.meta.key, self.meta.lsn, self.start, self.end + ) + } } /// Return type of [`VectoredBlobReader::read_blobs`] @@ -514,7 +627,7 @@ impl<'a> VectoredBlobReader<'a> { ); } - let mut buf = self + let buf = self .file .read_exact_at(buf.slice(0..read.size()), read.start, ctx) .await? @@ -529,9 +642,6 @@ impl<'a> VectoredBlobReader<'a> { // of a blob is implicit: the start of the next blob if one exists // or the end of the read. - // Some scratch space, put here for reusing the allocation - let mut decompressed_vec = Vec::new(); - for (blob_start, meta) in blobs_at { let blob_start_in_buf = blob_start - start_offset; let first_len_byte = buf[blob_start_in_buf as usize]; @@ -557,35 +667,14 @@ impl<'a> VectoredBlobReader<'a> { ) }; - let start_raw = blob_start_in_buf + size_length; - let end_raw = start_raw + blob_size; - let (start, end); - if compression_bits == BYTE_UNCOMPRESSED { - start = start_raw as usize; - end = end_raw as usize; - } else if compression_bits == BYTE_ZSTD { - let mut decoder = - async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec); - decoder - .write_all(&buf[start_raw as usize..end_raw as usize]) - .await?; - decoder.flush().await?; - start = buf.len(); - buf.extend_from_slice(&decompressed_vec); - end = buf.len(); - decompressed_vec.clear(); - } else { - let error = std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("invalid compression byte {compression_bits:x}"), - ); - return Err(error); - } + let start = (blob_start_in_buf + size_length) as usize; + let end = start + blob_size as usize; metas.push(VectoredBlob { start, end, meta: *meta, + compression_bits, }); } @@ -1020,8 +1109,13 @@ mod tests { let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?; assert_eq!(result.blobs.len(), 1); let read_blob = &result.blobs[0]; - let read_buf = &result.buf[read_blob.start..read_blob.end]; - assert_eq!(blob, read_buf, "mismatch for idx={idx} at offset={offset}"); + let view = BufView::new_slice(&result.buf); + let read_buf = read_blob.read(&view).await?; + assert_eq!( + &blob[..], + &read_buf[..], + "mismatch for idx={idx} at offset={offset}" + ); buf = result.buf; } Ok(()) diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 0fe7def8b0..a1c9fc5651 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -205,6 +205,22 @@ impl PostgresRedoManager { } } + /// Do a ping request-response roundtrip. + /// + /// Not used in production, but by Rust benchmarks. + /// + /// # Cancel-Safety + /// + /// This method is cancellation-safe. + pub async fn ping(&self, pg_version: u32) -> Result<(), Error> { + self.do_with_walredo_process(pg_version, |proc| async move { + proc.ping(Duration::from_secs(1)) + .await + .map_err(Error::Other) + }) + .await + } + pub fn status(&self) -> WalRedoManagerStatus { WalRedoManagerStatus { last_redo_at: { @@ -297,6 +313,9 @@ impl PostgresRedoManager { } } + /// # Cancel-Safety + /// + /// This method is cancel-safe iff `closure` is cancel-safe. async fn do_with_walredo_process< F: FnOnce(Arc) -> Fut, Fut: Future>, @@ -537,6 +556,17 @@ mod tests { use tracing::Instrument; use utils::{id::TenantId, lsn::Lsn}; + #[tokio::test] + async fn test_ping() { + let h = RedoHarness::new().unwrap(); + + h.manager + .ping(14) + .instrument(h.span()) + .await + .expect("ping should work"); + } + #[tokio::test] async fn short_v14_redo() { let expected = std::fs::read("test_data/short_v14_redo.page").unwrap(); diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs index 9140d4f6aa..f3197e68b5 100644 --- a/pageserver/src/walredo/process.rs +++ b/pageserver/src/walredo/process.rs @@ -6,6 +6,7 @@ use self::no_leak_child::NoLeakChild; use crate::{ config::PageServerConf, metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER}, + page_cache::PAGE_SZ, span::debug_assert_current_span_has_tenant_id, walrecord::NeonWalRecord, }; @@ -237,6 +238,26 @@ impl WalRedoProcess { res } + /// Do a ping request-response roundtrip. + /// + /// Not used in production, but by Rust benchmarks. + pub(crate) async fn ping(&self, timeout: Duration) -> anyhow::Result<()> { + let mut writebuf: Vec = Vec::with_capacity(4); + protocol::build_ping_msg(&mut writebuf); + let Ok(res) = tokio::time::timeout(timeout, self.apply_wal_records0(&writebuf)).await + else { + anyhow::bail!("WAL redo ping timed out"); + }; + let response = res?; + if response.len() != PAGE_SZ { + anyhow::bail!( + "WAL redo ping response should respond with page-sized response: {}", + response.len() + ); + } + Ok(()) + } + /// # Cancel-Safety /// /// When not polled to completion (e.g. because in `tokio::select!` another diff --git a/pageserver/src/walredo/process/protocol.rs b/pageserver/src/walredo/process/protocol.rs index b703344cc8..de3ca8741b 100644 --- a/pageserver/src/walredo/process/protocol.rs +++ b/pageserver/src/walredo/process/protocol.rs @@ -55,3 +55,8 @@ pub(crate) fn build_get_page_msg(tag: BufferTag, buf: &mut Vec) { tag.ser_into(buf) .expect("serialize BufferTag should always succeed"); } + +pub(crate) fn build_ping_msg(buf: &mut Vec) { + buf.put_u8(b'H'); + buf.put_u32(4); +} diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile index 3b755bb042..f1229b2d73 100644 --- a/pgxn/neon/Makefile +++ b/pgxn/neon/Makefile @@ -9,6 +9,8 @@ OBJS = \ hll.o \ libpagestore.o \ neon.o \ + neon_pgversioncompat.o \ + neon_perf_counters.o \ neon_utils.o \ neon_walreader.o \ pagestore_smgr.o \ @@ -23,7 +25,18 @@ SHLIB_LINK_INTERNAL = $(libpq) SHLIB_LINK = -lcurl EXTENSION = neon -DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql +DATA = \ + neon--1.0.sql \ + neon--1.0--1.1.sql \ + neon--1.1--1.2.sql \ + neon--1.2--1.3.sql \ + neon--1.3--1.4.sql \ + neon--1.4--1.5.sql \ + neon--1.5--1.4.sql \ + neon--1.4--1.3.sql \ + neon--1.3--1.2.sql \ + neon--1.2--1.1.sql \ + neon--1.1--1.0.sql PGFILEDESC = "neon - cloud storage for PostgreSQL" EXTRA_CLEAN = \ diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index ab6739465b..2b461c8641 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -109,6 +109,7 @@ typedef struct FileCacheControl * reenabling */ uint32 size; /* size of cache file in chunks */ uint32 used; /* number of used chunks */ + uint32 used_pages; /* number of used pages */ uint32 limit; /* shared copy of lfc_size_limit */ uint64 hits; uint64 misses; @@ -905,6 +906,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, /* Cache overflow: evict least recently used chunk */ FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru)); + for (int i = 0; i < BLOCKS_PER_CHUNK; i++) + { + lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1; + } CriticalAssert(victim->access_count == 0); entry->offset = victim->offset; /* grab victim's chunk */ hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL); @@ -959,6 +964,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, for (int i = 0; i < blocks_in_chunk; i++) { + lfc_ctl->used_pages += 1 - ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1); entry->bitmap[(chunk_offs + i) >> 5] |= (1 << ((chunk_offs + i) & 31)); } @@ -1051,6 +1057,11 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS) if (lfc_ctl) value = lfc_ctl->size; break; + case 5: + key = "file_cache_used_pages"; + if (lfc_ctl) + value = lfc_ctl->used_pages; + break; default: SRF_RETURN_DONE(funcctx); } diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index df7000acc0..07a19a7114 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -30,6 +30,7 @@ #include "utils/guc.h" #include "neon.h" +#include "neon_perf_counters.h" #include "neon_utils.h" #include "pagestore_client.h" #include "walproposer.h" @@ -331,6 +332,7 @@ CLEANUP_AND_DISCONNECT(PageServer *shard) } if (shard->conn) { + MyNeonCounters->pageserver_disconnects_total++; PQfinish(shard->conn); shard->conn = NULL; } @@ -737,6 +739,8 @@ pageserver_send(shardno_t shard_no, NeonRequest *request) PageServer *shard = &page_servers[shard_no]; PGconn *pageserver_conn; + MyNeonCounters->pageserver_requests_sent_total++; + /* If the connection was lost for some reason, reconnect */ if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD) { @@ -889,6 +893,7 @@ pageserver_flush(shardno_t shard_no) } else { + MyNeonCounters->pageserver_send_flushes_total++; if (PQflush(pageserver_conn)) { char *msg = pchomp(PQerrorMessage(pageserver_conn)); @@ -922,7 +927,7 @@ check_neon_id(char **newval, void **extra, GucSource source) static Size PagestoreShmemSize(void) { - return sizeof(PagestoreShmemState); + return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize()); } static bool @@ -941,6 +946,9 @@ PagestoreShmemInit(void) memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap)); AssignPageserverConnstring(page_server_connstring, NULL); } + + NeonPerfCountersShmemInit(); + LWLockRelease(AddinShmemInitLock); return found; } diff --git a/pgxn/neon/neon--1.4--1.5.sql b/pgxn/neon/neon--1.4--1.5.sql new file mode 100644 index 0000000000..a1db7bf1b1 --- /dev/null +++ b/pgxn/neon/neon--1.4--1.5.sql @@ -0,0 +1,39 @@ +\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit + + +CREATE FUNCTION get_backend_perf_counters() +RETURNS SETOF RECORD +AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters' +LANGUAGE C PARALLEL SAFE; + +CREATE FUNCTION get_perf_counters() +RETURNS SETOF RECORD +AS 'MODULE_PATHNAME', 'neon_get_perf_counters' +LANGUAGE C PARALLEL SAFE; + +-- Show various metrics, for each backend. Note that the values are not reset +-- when a backend exits. When a new backend starts with the backend ID, it will +-- continue accumulating the values from where the old backend left. If you are +-- only interested in the changes from your own session, store the values at the +-- beginning of the session somewhere, and subtract them on subsequent calls. +-- +-- For histograms, 'bucket_le' is the upper bound of the histogram bucket. +CREATE VIEW neon_backend_perf_counters AS + SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value + FROM get_backend_perf_counters() AS P ( + procno integer, + pid integer, + metric text, + bucket_le float8, + value float8 + ); + +-- Summary across all backends. (This could also be implemented with +-- an aggregate query over neon_backend_perf_counters view.) +CREATE VIEW neon_perf_counters AS + SELECT P.metric, P.bucket_le, P.value + FROM get_perf_counters() AS P ( + metric text, + bucket_le float8, + value float8 + ); diff --git a/pgxn/neon/neon--1.5--1.4.sql b/pgxn/neon/neon--1.5--1.4.sql new file mode 100644 index 0000000000..7939fd8aa9 --- /dev/null +++ b/pgxn/neon/neon--1.5--1.4.sql @@ -0,0 +1,4 @@ +DROP VIEW IF EXISTS neon_perf_counters; +DROP VIEW IF EXISTS neon_backend_perf_counters; +DROP FUNCTION IF EXISTS get_perf_counters(); +DROP FUNCTION IF EXISTS get_backend_perf_counters(); diff --git a/pgxn/neon/neon.control b/pgxn/neon/neon.control index 03bdb9a0b4..0b36bdbb65 100644 --- a/pgxn/neon/neon.control +++ b/pgxn/neon/neon.control @@ -1,5 +1,7 @@ # neon extension comment = 'cloud storage for PostgreSQL' +# TODO: bump default version to 1.5, after we are certain that we don't +# need to rollback the compute image default_version = '1.4' module_pathname = '$libdir/neon' relocatable = true diff --git a/pgxn/neon/neon_perf_counters.c b/pgxn/neon/neon_perf_counters.c new file mode 100644 index 0000000000..de653826c0 --- /dev/null +++ b/pgxn/neon/neon_perf_counters.c @@ -0,0 +1,261 @@ +/*------------------------------------------------------------------------- + * + * neon_perf_counters.c + * Collect statistics about Neon I/O + * + * Each backend has its own set of counters in shared memory. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/proc.h" +#include "storage/shmem.h" +#include "utils/builtins.h" + +#include "neon_perf_counters.h" +#include "neon_pgversioncompat.h" + +neon_per_backend_counters *neon_per_backend_counters_shared; + +Size +NeonPerfCountersShmemSize(void) +{ + Size size = 0; + + size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters))); + + return size; +} + +void +NeonPerfCountersShmemInit(void) +{ + bool found; + + neon_per_backend_counters_shared = + ShmemInitStruct("Neon perf counters", + mul_size(MaxBackends, + sizeof(neon_per_backend_counters)), + &found); + Assert(found == IsUnderPostmaster); + if (!found) + { + /* shared memory is initialized to zeros, so nothing to do here */ + } +} + +/* + * Count a GetPage wait operation. + */ +void +inc_getpage_wait(uint64 latency_us) +{ + int lo = 0; + int hi = NUM_GETPAGE_WAIT_BUCKETS - 1; + + /* Find the right bucket with binary search */ + while (lo < hi) + { + int mid = (lo + hi) / 2; + + if (latency_us < getpage_wait_bucket_thresholds[mid]) + hi = mid; + else + lo = mid + 1; + } + MyNeonCounters->getpage_wait_us_bucket[lo]++; + MyNeonCounters->getpage_wait_us_sum += latency_us; + MyNeonCounters->getpage_wait_us_count++; +} + +/* + * Support functions for the views, neon_backend_perf_counters and + * neon_perf_counters. + */ + +typedef struct +{ + char *name; + bool is_bucket; + double bucket_le; + double value; +} metric_t; + +static metric_t * +neon_perf_counters_to_metrics(neon_per_backend_counters *counters) +{ +#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8) + metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t)); + uint64 bucket_accum; + int i = 0; + Datum getpage_wait_str; + + metrics[i].name = "getpage_wait_seconds_count"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_wait_us_count; + i++; + metrics[i].name = "getpage_wait_seconds_sum"; + metrics[i].is_bucket = false; + metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0; + i++; + + bucket_accum = 0; + for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++) + { + uint64 threshold = getpage_wait_bucket_thresholds[bucketno]; + + bucket_accum += counters->getpage_wait_us_bucket[bucketno]; + + metrics[i].name = "getpage_wait_seconds_bucket"; + metrics[i].is_bucket = true; + metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0; + metrics[i].value = (double) bucket_accum; + i++; + } + metrics[i].name = "getpage_prefetch_requests_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_prefetch_requests_total; + i++; + metrics[i].name = "getpage_sync_requests_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_sync_requests_total; + i++; + metrics[i].name = "getpage_prefetch_misses_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_prefetch_misses_total; + i++; + metrics[i].name = "getpage_prefetch_discards_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_prefetch_discards_total; + i++; + metrics[i].name = "pageserver_requests_sent_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->pageserver_requests_sent_total; + i++; + metrics[i].name = "pageserver_requests_disconnects_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->pageserver_disconnects_total; + i++; + metrics[i].name = "pageserver_send_flushes_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->pageserver_send_flushes_total; + i++; + metrics[i].name = "file_cache_hits_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->file_cache_hits_total; + i++; + + Assert(i == NUM_METRICS); + + /* NULL entry marks end of array */ + metrics[i].name = NULL; + metrics[i].value = 0; + + return metrics; +} + +/* + * Write metric to three output Datums + */ +static void +metric_to_datums(metric_t *m, Datum *values, bool *nulls) +{ + values[0] = CStringGetTextDatum(m->name); + nulls[0] = false; + if (m->is_bucket) + { + values[1] = Float8GetDatum(m->bucket_le); + nulls[1] = false; + } + else + { + values[1] = (Datum) 0; + nulls[1] = true; + } + values[2] = Float8GetDatum(m->value); + nulls[2] = false; +} + +PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters); +Datum +neon_get_backend_perf_counters(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[5]; + bool nulls[5]; + + /* We put all the tuples into a tuplestore in one go. */ + InitMaterializedSRF(fcinfo, 0); + + for (int procno = 0; procno < MaxBackends; procno++) + { + PGPROC *proc = GetPGProcByNumber(procno); + int pid = proc->pid; + neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; + metric_t *metrics = neon_perf_counters_to_metrics(counters); + + values[0] = Int32GetDatum(procno); + nulls[0] = false; + values[1] = Int32GetDatum(pid); + nulls[1] = false; + + for (int i = 0; metrics[i].name != NULL; i++) + { + metric_to_datums(&metrics[i], &values[2], &nulls[2]); + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + pfree(metrics); + } + + return (Datum) 0; +} + +PG_FUNCTION_INFO_V1(neon_get_perf_counters); +Datum +neon_get_perf_counters(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[3]; + bool nulls[3]; + Datum getpage_wait_str; + neon_per_backend_counters totals = {0}; + metric_t *metrics; + + /* We put all the tuples into a tuplestore in one go. */ + InitMaterializedSRF(fcinfo, 0); + + /* Aggregate the counters across all backends */ + for (int procno = 0; procno < MaxBackends; procno++) + { + neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; + + totals.getpage_wait_us_count += counters->getpage_wait_us_count; + totals.getpage_wait_us_sum += counters->getpage_wait_us_sum; + for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++) + totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno]; + totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total; + totals.getpage_sync_requests_total += counters->getpage_sync_requests_total; + totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total; + totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total; + totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total; + totals.pageserver_disconnects_total += counters->pageserver_disconnects_total; + totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total; + totals.file_cache_hits_total += counters->file_cache_hits_total; + } + + metrics = neon_perf_counters_to_metrics(&totals); + for (int i = 0; metrics[i].name != NULL; i++) + { + metric_to_datums(&metrics[i], &values[0], &nulls[0]); + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + pfree(metrics); + + return (Datum) 0; +} diff --git a/pgxn/neon/neon_perf_counters.h b/pgxn/neon/neon_perf_counters.h new file mode 100644 index 0000000000..02163ada55 --- /dev/null +++ b/pgxn/neon/neon_perf_counters.h @@ -0,0 +1,111 @@ +/*------------------------------------------------------------------------- + * + * neon_perf_counters.h + * Performance counters for neon storage requests + *------------------------------------------------------------------------- + */ + +#ifndef NEON_PERF_COUNTERS_H +#define NEON_PERF_COUNTERS_H + +#if PG_VERSION_NUM >= 170000 +#include "storage/procnumber.h" +#else +#include "storage/backendid.h" +#include "storage/proc.h" +#endif + +static const uint64 getpage_wait_bucket_thresholds[] = { + 20, 30, 60, 100, /* 0 - 100 us */ + 200, 300, 600, 1000, /* 100 us - 1 ms */ + 2000, 3000, 6000, 10000, /* 1 ms - 10 ms */ + 20000, 30000, 60000, 100000, /* 10 ms - 100 ms */ + 200000, 300000, 600000, 1000000, /* 100 ms - 1 s */ + 2000000, 3000000, 6000000, 10000000, /* 1 s - 10 s */ + 20000000, 30000000, 60000000, 100000000, /* 10 s - 100 s */ + UINT64_MAX, +}; +#define NUM_GETPAGE_WAIT_BUCKETS (lengthof(getpage_wait_bucket_thresholds)) + +typedef struct +{ + /* + * Histogram for how long an smgrread() request needs to wait for response + * from pageserver. When prefetching is effective, these wait times can be + * lower than the network latency to the pageserver, even zero, if the + * page is already readily prefetched whenever we need to read a page. + * + * Note: we accumulate these in microseconds, because that's convenient in + * the backend, but the 'neon_backend_perf_counters' view will convert + * them to seconds, to make them more idiomatic as prometheus metrics. + */ + uint64 getpage_wait_us_count; + uint64 getpage_wait_us_sum; + uint64 getpage_wait_us_bucket[NUM_GETPAGE_WAIT_BUCKETS]; + + /* + * Total number of speculative prefetch Getpage requests and synchronous + * GetPage requests sent. + */ + uint64 getpage_prefetch_requests_total; + uint64 getpage_sync_requests_total; + + /* XXX: It's not clear to me when these misses happen. */ + uint64 getpage_prefetch_misses_total; + + /* + * Number of prefetched responses that were discarded becuase the + * prefetched page was not needed or because it was concurrently fetched / + * modified by another backend. + */ + uint64 getpage_prefetch_discards_total; + + /* + * Total number of requests send to pageserver. (prefetch_requests_total + * and sync_request_total count only GetPage requests, this counts all + * request types.) + */ + uint64 pageserver_requests_sent_total; + + /* + * Number of times the connection to the pageserver was lost and the + * backend had to reconnect. Note that this doesn't count the first + * connection in each backend, only reconnects. + */ + uint64 pageserver_disconnects_total; + + /* + * Number of network flushes to the pageserver. Synchronous requests are + * flushed immediately, but when prefetching requests are sent in batches, + * this can be smaller than pageserver_requests_sent_total. + */ + uint64 pageserver_send_flushes_total; + + /* + * Number of requests satisfied from the LFC. + * + * This is redundant with the server-wide file_cache_hits, but this gives + * per-backend granularity, and it's handy to have this in the same place + * as counters for requests that went to the pageserver. Maybe move all + * the LFC stats to this struct in the future? + */ + uint64 file_cache_hits_total; + +} neon_per_backend_counters; + +/* Pointer to the shared memory array of neon_per_backend_counters structs */ +extern neon_per_backend_counters *neon_per_backend_counters_shared; + +#if PG_VERSION_NUM >= 170000 +#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber]) +#else +#define MyNeonCounters (&neon_per_backend_counters_shared[MyProc->pgprocno]) +#endif + +extern void inc_getpage_wait(uint64 latency); + +extern Size NeonPerfCountersShmemSize(void); +extern void NeonPerfCountersShmemInit(void); + + +#endif /* NEON_PERF_COUNTERS_H */ diff --git a/pgxn/neon/neon_pgversioncompat.c b/pgxn/neon/neon_pgversioncompat.c new file mode 100644 index 0000000000..a0dbddde4b --- /dev/null +++ b/pgxn/neon/neon_pgversioncompat.c @@ -0,0 +1,44 @@ +/* + * Support functions for the compatibility macros in neon_pgversioncompat.h + */ +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/tuplestore.h" + +#include "neon_pgversioncompat.h" + +#if PG_MAJORVERSION_NUM < 15 +void +InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Tuplestorestate *tupstore; + MemoryContext old_context, + per_query_ctx; + TupleDesc stored_tupdesc; + + /* check to see if caller supports returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + + /* + * Store the tuplestore and the tuple descriptor in ReturnSetInfo. This + * must be done in the per-query memory context. + */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + old_context = MemoryContextSwitchTo(per_query_ctx); + + if (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + tupstore = tuplestore_begin_heap(false, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = stored_tupdesc; + MemoryContextSwitchTo(old_context); +} +#endif diff --git a/pgxn/neon/neon_pgversioncompat.h b/pgxn/neon/neon_pgversioncompat.h index 59b97d64fe..e4754ec7ea 100644 --- a/pgxn/neon/neon_pgversioncompat.h +++ b/pgxn/neon/neon_pgversioncompat.h @@ -6,6 +6,8 @@ #ifndef NEON_PGVERSIONCOMPAT_H #define NEON_PGVERSIONCOMPAT_H +#include "fmgr.h" + #if PG_MAJORVERSION_NUM < 17 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId) #else @@ -123,4 +125,8 @@ #define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess()) #endif +#if PG_MAJORVERSION_NUM < 15 +extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags); +#endif + #endif /* NEON_PGVERSIONCOMPAT_H */ diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 36538ea5e2..1c87f4405c 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -66,6 +66,7 @@ #include "storage/md.h" #include "storage/smgr.h" +#include "neon_perf_counters.h" #include "pagestore_client.h" #include "bitmap.h" @@ -289,7 +290,6 @@ static PrefetchState *MyPState; static bool compact_prefetch_buffers(void); static void consume_prefetch_responses(void); -static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns); static bool prefetch_read(PrefetchRequest *slot); static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns); static bool prefetch_wait_for(uint64 ring_index); @@ -780,21 +780,27 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns } /* - * prefetch_register_buffer() - register and prefetch buffer + * prefetch_register_bufferv() - register and prefetch buffers * * Register that we may want the contents of BufferTag in the near future. + * This is used when issuing a speculative prefetch request, but also when + * performing a synchronous request and need the buffer right now. * * If force_request_lsns is not NULL, those values are sent to the * pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure * to calculate the LSNs to send. * + * When performing a prefetch rather than a synchronous request, + * is_prefetch==true. Currently, it only affects how the request is accounted + * in the perf counters. + * * NOTE: this function may indirectly update MyPState->pfs_hash; which * invalidates any active pointers into the hash table. */ - static uint64 prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns, - BlockNumber nblocks, const bits8 *mask) + BlockNumber nblocks, const bits8 *mask, + bool is_prefetch) { uint64 min_ring_index; PrefetchRequest req; @@ -815,6 +821,7 @@ Retry: PrfHashEntry *entry = NULL; uint64 ring_index; neon_request_lsns *lsns; + if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i)) continue; @@ -858,6 +865,7 @@ Retry: prefetch_set_unused(ring_index); entry = NULL; slot = NULL; + MyNeonCounters->getpage_prefetch_discards_total++; } } @@ -972,6 +980,11 @@ Retry: min_ring_index = Min(min_ring_index, ring_index); + if (is_prefetch) + MyNeonCounters->getpage_prefetch_requests_total++; + else + MyNeonCounters->getpage_sync_requests_total++; + prefetch_do_request(slot, lsns); } @@ -1000,13 +1013,6 @@ Retry: } -static uint64 -prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns) -{ - return prefetch_register_bufferv(tag, force_request_lsns, 1, NULL); -} - - /* * Note: this function can get canceled and use a long jump to the next catch * context. Take care. @@ -2612,7 +2618,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, lfc_present[i] = ~(lfc_present[i]); ring_index = prefetch_register_bufferv(tag, NULL, iterblocks, - lfc_present); + lfc_present, true); nblocks -= iterblocks; blocknum += iterblocks; @@ -2656,7 +2662,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln)); - ring_index = prefetch_register_buffer(tag, NULL); + ring_index = prefetch_register_bufferv(tag, NULL, 1, NULL, true); Assert(ring_index < MyPState->ring_unused && MyPState->ring_last <= ring_index); @@ -2747,17 +2753,20 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block * weren't for the behaviour of the LwLsn cache that uses the highest * value of the LwLsn cache when the entry is not found. */ - prefetch_register_bufferv(buftag, request_lsns, nblocks, mask); + prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false); for (int i = 0; i < nblocks; i++) { void *buffer = buffers[i]; BlockNumber blockno = base_blockno + i; neon_request_lsns *reqlsns = &request_lsns[i]; + TimestampTz start_ts, end_ts; if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i)) continue; + start_ts = GetCurrentTimestamp(); + if (RecoveryInProgress() && MyBackendType != B_STARTUP) XLogWaitForReplayOf(reqlsns[0].request_lsn); @@ -2794,6 +2803,7 @@ Retry: /* drop caches */ prefetch_set_unused(slot->my_ring_index); pgBufferUsage.prefetch.expired += 1; + MyNeonCounters->getpage_prefetch_discards_total++; /* make it look like a prefetch cache miss */ entry = NULL; } @@ -2804,8 +2814,9 @@ Retry: if (entry == NULL) { pgBufferUsage.prefetch.misses += 1; + MyNeonCounters->getpage_prefetch_misses_total++; - ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL); + ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false); Assert(ring_index != UINT64_MAX); slot = GetPrfSlot(ring_index); } @@ -2860,6 +2871,9 @@ Retry: /* buffer was used, clean up for later reuse */ prefetch_set_unused(ring_index); prefetch_cleanup_trailing_unused(); + + end_ts = GetCurrentTimestamp(); + inc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0); } } @@ -2913,6 +2927,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer /* Try to read from local file cache */ if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer)) { + MyNeonCounters->file_cache_hits_total++; return; } @@ -3097,7 +3112,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, /* assume heap */ RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno); RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno); - + if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) { neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", diff --git a/pgxn/neon_walredo/walredoproc.c b/pgxn/neon_walredo/walredoproc.c index 219ca85207..f98aa1cbe7 100644 --- a/pgxn/neon_walredo/walredoproc.c +++ b/pgxn/neon_walredo/walredoproc.c @@ -24,6 +24,7 @@ * PushPage ('P'): Copy a page image (in the payload) to buffer cache * ApplyRecord ('A'): Apply a WAL record (in the payload) * GetPage ('G'): Return a page image from buffer cache. + * Ping ('H'): Return the input message. * * Currently, you only get a response to GetPage requests; the response is * simply a 8k page, without any headers. Errors are logged to stderr. @@ -133,6 +134,7 @@ static void ApplyRecord(StringInfo input_message); static void apply_error_callback(void *arg); static bool redo_block_filter(XLogReaderState *record, uint8 block_id); static void GetPage(StringInfo input_message); +static void Ping(StringInfo input_message); static ssize_t buffered_read(void *buf, size_t count); static void CreateFakeSharedMemoryAndSemaphores(); @@ -394,6 +396,10 @@ WalRedoMain(int argc, char *argv[]) GetPage(&input_message); break; + case 'H': /* Ping */ + Ping(&input_message); + break; + /* * EOF means we're done. Perform normal shutdown. */ @@ -1057,6 +1063,36 @@ GetPage(StringInfo input_message) } +static void +Ping(StringInfo input_message) +{ + int tot_written; + /* Response: the input message */ + tot_written = 0; + do { + ssize_t rc; + /* We don't need alignment, but it's bad practice to use char[BLCKSZ] */ +#if PG_VERSION_NUM >= 160000 + static const PGIOAlignedBlock response; +#else + static const PGAlignedBlock response; +#endif + rc = write(STDOUT_FILENO, &response.data[tot_written], BLCKSZ - tot_written); + if (rc < 0) { + /* If interrupted by signal, just retry */ + if (errno == EINTR) + continue; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to stdout: %m"))); + } + tot_written += rc; + } while (tot_written < BLCKSZ); + + elog(TRACE, "Page sent back for ping"); +} + + /* Buffer used by buffered_read() */ static char stdin_buf[16 * 1024]; static size_t stdin_len = 0; /* # of bytes in buffer */ diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index 6703eb06eb..501ce050e0 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -29,7 +29,6 @@ dashmap.workspace = true env_logger.workspace = true framed-websockets.workspace = true futures.workspace = true -git-version.workspace = true hashbrown.workspace = true hashlink.workspace = true hex.workspace = true diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs index 5561c9c56d..4e9f4591ad 100644 --- a/proxy/src/auth/backend.rs +++ b/proxy/src/auth/backend.rs @@ -163,6 +163,7 @@ impl ComputeUserInfo { } pub(crate) enum ComputeCredentialKeys { + #[cfg(any(test, feature = "testing"))] Password(Vec), AuthKeys(AuthKeys), None, @@ -293,16 +294,10 @@ async fn auth_quirks( // We now expect to see a very specific payload in the place of password. let (info, unauthenticated_password) = match user_info.try_into() { Err(info) => { - let res = hacks::password_hack_no_authentication(ctx, info, client).await?; - - ctx.set_endpoint_id(res.info.endpoint.clone()); - let password = match res.keys { - ComputeCredentialKeys::Password(p) => p, - ComputeCredentialKeys::AuthKeys(_) | ComputeCredentialKeys::None => { - unreachable!("password hack should return a password") - } - }; - (res.info, Some(password)) + let (info, password) = + hacks::password_hack_no_authentication(ctx, info, client).await?; + ctx.set_endpoint_id(info.endpoint.clone()); + (info, Some(password)) } Ok(info) => (info, None), }; @@ -449,7 +444,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> { Self::Web(url, ()) => { info!("performing web authentication"); - let info = web::authenticate(ctx, &url, client).await?; + let info = web::authenticate(ctx, config, &url, client).await?; Backend::Web(url, info) } diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs index e9019ce2cf..15123a2623 100644 --- a/proxy/src/auth/backend/hacks.rs +++ b/proxy/src/auth/backend/hacks.rs @@ -1,6 +1,4 @@ -use super::{ - ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint, -}; +use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint}; use crate::{ auth::{self, AuthFlow}, config::AuthenticationConfig, @@ -63,7 +61,7 @@ pub(crate) async fn password_hack_no_authentication( ctx: &RequestMonitoring, info: ComputeUserInfoNoEndpoint, client: &mut stream::PqStream>, -) -> auth::Result { +) -> auth::Result<(ComputeUserInfo, Vec)> { warn!("project not specified, resorting to the password hack auth flow"); ctx.set_auth_method(crate::context::AuthMethod::Cleartext); @@ -79,12 +77,12 @@ pub(crate) async fn password_hack_no_authentication( info!(project = &*payload.endpoint, "received missing parameter"); // Report tentative success; compute node will check the password anyway. - Ok(ComputeCredentials { - info: ComputeUserInfo { + Ok(( + ComputeUserInfo { user: info.user, options: info.options, endpoint: payload.endpoint, }, - keys: ComputeCredentialKeys::Password(payload.password), - }) + payload.password, + )) } diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs index 1f44e4af5d..94e5999a5f 100644 --- a/proxy/src/auth/backend/jwt.rs +++ b/proxy/src/auth/backend/jwt.rs @@ -25,6 +25,8 @@ const MAX_JWK_BODY_SIZE: usize = 64 * 1024; pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static { fn fetch_auth_rules( &self, + ctx: &RequestMonitoring, + endpoint: EndpointId, role_name: RoleName, ) -> impl Future>> + Send; } @@ -101,7 +103,9 @@ impl JwkCacheEntryLock { async fn renew_jwks( &self, _permit: JwkRenewalPermit<'_>, + ctx: &RequestMonitoring, client: &reqwest::Client, + endpoint: EndpointId, role_name: RoleName, auth_rules: &F, ) -> anyhow::Result> { @@ -115,7 +119,9 @@ impl JwkCacheEntryLock { } } - let rules = auth_rules.fetch_auth_rules(role_name).await?; + let rules = auth_rules + .fetch_auth_rules(ctx, endpoint, role_name) + .await?; let mut key_sets = ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new()); // TODO(conrad): run concurrently @@ -166,6 +172,7 @@ impl JwkCacheEntryLock { self: &Arc, ctx: &RequestMonitoring, client: &reqwest::Client, + endpoint: EndpointId, role_name: RoleName, fetch: &F, ) -> Result, anyhow::Error> { @@ -176,7 +183,9 @@ impl JwkCacheEntryLock { let Some(cached) = guard else { let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); let permit = self.acquire_permit().await; - return self.renew_jwks(permit, client, role_name, fetch).await; + return self + .renew_jwks(permit, ctx, client, endpoint, role_name, fetch) + .await; }; let last_update = now.duration_since(cached.last_retrieved); @@ -187,7 +196,9 @@ impl JwkCacheEntryLock { let permit = self.acquire_permit().await; // it's been too long since we checked the keys. wait for them to update. - return self.renew_jwks(permit, client, role_name, fetch).await; + return self + .renew_jwks(permit, ctx, client, endpoint, role_name, fetch) + .await; } // every 5 minutes we should spawn a job to eagerly update the token. @@ -198,8 +209,12 @@ impl JwkCacheEntryLock { let entry = self.clone(); let client = client.clone(); let fetch = fetch.clone(); + let ctx = ctx.clone(); tokio::spawn(async move { - if let Err(e) = entry.renew_jwks(permit, &client, role_name, &fetch).await { + if let Err(e) = entry + .renew_jwks(permit, &ctx, &client, endpoint, role_name, &fetch) + .await + { tracing::warn!(error=?e, "could not fetch JWKs in background job"); } }); @@ -216,6 +231,7 @@ impl JwkCacheEntryLock { ctx: &RequestMonitoring, jwt: &str, client: &reqwest::Client, + endpoint: EndpointId, role_name: RoleName, fetch: &F, ) -> Result<(), anyhow::Error> { @@ -242,7 +258,7 @@ impl JwkCacheEntryLock { let kid = header.key_id.context("missing key id")?; let mut guard = self - .get_or_update_jwk_cache(ctx, client, role_name.clone(), fetch) + .get_or_update_jwk_cache(ctx, client, endpoint.clone(), role_name.clone(), fetch) .await?; // get the key from the JWKs if possible. If not, wait for the keys to update. @@ -254,7 +270,14 @@ impl JwkCacheEntryLock { let permit = self.acquire_permit().await; guard = self - .renew_jwks(permit, client, role_name.clone(), fetch) + .renew_jwks( + permit, + ctx, + client, + endpoint.clone(), + role_name.clone(), + fetch, + ) .await?; } _ => { @@ -318,7 +341,7 @@ impl JwkCache { jwt: &str, ) -> Result<(), anyhow::Error> { // try with just a read lock first - let key = (endpoint, role_name.clone()); + let key = (endpoint.clone(), role_name.clone()); let entry = self.map.get(&key).as_deref().map(Arc::clone); let entry = entry.unwrap_or_else(|| { // acquire a write lock after to insert. @@ -327,7 +350,7 @@ impl JwkCache { }); entry - .check_jwt(ctx, jwt, &self.client, role_name, fetch) + .check_jwt(ctx, jwt, &self.client, endpoint, role_name, fetch) .await } } @@ -688,6 +711,8 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL impl FetchAuthRules for Fetch { async fn fetch_auth_rules( &self, + _ctx: &RequestMonitoring, + _endpoint: EndpointId, _role_name: RoleName, ) -> anyhow::Result> { Ok(vec![ @@ -706,6 +731,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL } let role_name = RoleName::from("user"); + let endpoint = EndpointId::from("ep"); let jwk_cache = Arc::new(JwkCacheEntryLock::default()); @@ -715,6 +741,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL &RequestMonitoring::test(), &token, &client, + endpoint.clone(), role_name.clone(), &Fetch(addr), ) diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index 8124f568cf..2ff2ca00f0 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -9,8 +9,9 @@ use crate::{ messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo}, NodeInfo, }, + context::RequestMonitoring, intern::{BranchIdInt, BranchIdTag, EndpointIdTag, InternId, ProjectIdInt, ProjectIdTag}, - RoleName, + EndpointId, RoleName, }; use super::jwt::{AuthRule, FetchAuthRules, JwkCache}; @@ -57,7 +58,12 @@ pub struct JwksRoleSettings { } impl FetchAuthRules for StaticAuthRules { - async fn fetch_auth_rules(&self, role_name: RoleName) -> anyhow::Result> { + async fn fetch_auth_rules( + &self, + _ctx: &RequestMonitoring, + _endpoint: EndpointId, + role_name: RoleName, + ) -> anyhow::Result> { let mappings = JWKS_ROLE_MAP.load(); let role_mappings = mappings .as_deref() diff --git a/proxy/src/auth/backend/web.rs b/proxy/src/auth/backend/web.rs index 58a4bef62e..05f437355e 100644 --- a/proxy/src/auth/backend/web.rs +++ b/proxy/src/auth/backend/web.rs @@ -1,5 +1,6 @@ use crate::{ auth, compute, + config::AuthenticationConfig, console::{self, provider::NodeInfo}, context::RequestMonitoring, error::{ReportableError, UserFacingError}, @@ -58,6 +59,7 @@ pub(crate) fn new_psql_session_id() -> String { pub(super) async fn authenticate( ctx: &RequestMonitoring, + auth_config: &'static AuthenticationConfig, link_uri: &reqwest::Url, client: &mut PqStream, ) -> auth::Result { @@ -89,6 +91,14 @@ pub(super) async fn authenticate( info!(parent: &span, "waiting for console's reply..."); let db_info = waiter.await.map_err(WebAuthError::from)?; + if auth_config.ip_allowlist_check_enabled { + if let Some(allowed_ips) = &db_info.allowed_ips { + if !auth::check_peer_addr_is_in_list(&ctx.peer_addr(), allowed_ips) { + return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr())); + } + } + } + client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?; // This config should be self-contained, because we won't diff --git a/proxy/src/console/messages.rs b/proxy/src/console/messages.rs index 9b66333cd4..85683acb82 100644 --- a/proxy/src/console/messages.rs +++ b/proxy/src/console/messages.rs @@ -284,6 +284,8 @@ pub(crate) struct DatabaseInfo { /// be inconvenient for debug with local PG instance. pub(crate) password: Option>, pub(crate) aux: MetricsAuxInfo, + #[serde(default)] + pub(crate) allowed_ips: Option>, } // Manually implement debug to omit sensitive info. @@ -294,6 +296,7 @@ impl fmt::Debug for DatabaseInfo { .field("port", &self.port) .field("dbname", &self.dbname) .field("user", &self.user) + .field("allowed_ips", &self.allowed_ips) .finish_non_exhaustive() } } @@ -432,6 +435,22 @@ mod tests { "aux": dummy_aux(), }))?; + // with allowed_ips + let dbinfo = serde_json::from_value::(json!({ + "host": "localhost", + "port": 5432, + "dbname": "postgres", + "user": "john_doe", + "password": "password", + "aux": dummy_aux(), + "allowed_ips": ["127.0.0.1"], + }))?; + + assert_eq!( + dbinfo.allowed_ips, + Some(vec![IpPattern::Single("127.0.0.1".parse()?)]) + ); + Ok(()) } diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs index 12a6e2f12a..16e8da605b 100644 --- a/proxy/src/console/provider.rs +++ b/proxy/src/console/provider.rs @@ -303,6 +303,7 @@ impl NodeInfo { pub(crate) fn set_keys(&mut self, keys: &ComputeCredentialKeys) { match keys { + #[cfg(any(test, feature = "testing"))] ComputeCredentialKeys::Password(password) => self.config.password(password), ComputeCredentialKeys::AuthKeys(auth_keys) => self.config.auth_keys(*auth_keys), ComputeCredentialKeys::None => &mut self.config, diff --git a/proxy/src/context.rs b/proxy/src/context.rs index c013218ad9..021659e175 100644 --- a/proxy/src/context.rs +++ b/proxy/src/context.rs @@ -79,6 +79,40 @@ pub(crate) enum AuthMethod { Cleartext, } +impl Clone for RequestMonitoring { + fn clone(&self) -> Self { + let inner = self.0.try_lock().expect("should not deadlock"); + let new = RequestMonitoringInner { + peer_addr: inner.peer_addr, + session_id: inner.session_id, + protocol: inner.protocol, + first_packet: inner.first_packet, + region: inner.region, + span: info_span!("background_task"), + + project: inner.project, + branch: inner.branch, + endpoint_id: inner.endpoint_id.clone(), + dbname: inner.dbname.clone(), + user: inner.user.clone(), + application: inner.application.clone(), + error_kind: inner.error_kind, + auth_method: inner.auth_method.clone(), + success: inner.success, + rejected: inner.rejected, + cold_start_info: inner.cold_start_info, + pg_options: inner.pg_options.clone(), + + sender: None, + disconnect_sender: None, + latency_timer: LatencyTimer::noop(inner.protocol), + disconnect_timestamp: inner.disconnect_timestamp, + }; + + Self(TryLock::new(new)) + } +} + impl RequestMonitoring { pub fn new( session_id: Uuid, diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index 2da7eac580..c2567e083a 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -397,6 +397,8 @@ pub struct LatencyTimer { protocol: Protocol, cold_start_info: ColdStartInfo, outcome: ConnectOutcome, + + skip_reporting: bool, } impl LatencyTimer { @@ -409,6 +411,20 @@ impl LatencyTimer { cold_start_info: ColdStartInfo::Unknown, // assume failed unless otherwise specified outcome: ConnectOutcome::Failed, + skip_reporting: false, + } + } + + pub(crate) fn noop(protocol: Protocol) -> Self { + Self { + start: time::Instant::now(), + stop: None, + accumulated: Accumulated::default(), + protocol, + cold_start_info: ColdStartInfo::Unknown, + // assume failed unless otherwise specified + outcome: ConnectOutcome::Failed, + skip_reporting: true, } } @@ -443,6 +459,10 @@ pub enum ConnectOutcome { impl Drop for LatencyTimer { fn drop(&mut self) { + if self.skip_reporting { + return; + } + let duration = self .stop .unwrap_or_else(time::Instant::now) diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index d163878528..aa236907db 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -27,7 +27,7 @@ use crate::{ Host, }; -use super::conn_pool::{poll_client, AuthData, Client, ConnInfo, GlobalConnPool}; +use super::conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool}; pub(crate) struct PoolingBackend { pub(crate) pool: Arc>, @@ -274,13 +274,6 @@ impl ConnectMechanism for TokioMechanism { .dbname(&self.conn_info.dbname) .connect_timeout(timeout); - match &self.conn_info.auth { - AuthData::Jwt(_) => {} - AuthData::Password(pw) => { - config.password(pw); - } - } - let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); let res = config.connect(tokio_postgres::NoTls).await; drop(pause); diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index 6c32d5df0e..a850ecd2be 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -29,11 +29,16 @@ use tracing::{info, info_span, Instrument}; use super::backend::HttpConnError; +#[derive(Debug, Clone)] +pub(crate) struct ConnInfoWithAuth { + pub(crate) conn_info: ConnInfo, + pub(crate) auth: AuthData, +} + #[derive(Debug, Clone)] pub(crate) struct ConnInfo { pub(crate) user_info: ComputeUserInfo, pub(crate) dbname: DbName, - pub(crate) auth: AuthData, } #[derive(Debug, Clone)] @@ -787,7 +792,6 @@ mod tests { options: NeonOptions::default(), }, dbname: "dbname".into(), - auth: AuthData::Password("password".as_bytes().into()), }; let ep_pool = Arc::downgrade( &pool.get_or_create_endpoint_pool(&conn_info.endpoint_cache_key().unwrap()), @@ -845,7 +849,6 @@ mod tests { options: NeonOptions::default(), }, dbname: "dbname".into(), - auth: AuthData::Password("password".as_bytes().into()), }; let ep_pool = Arc::downgrade( &pool.get_or_create_endpoint_pool(&conn_info.endpoint_cache_key().unwrap()), diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 06e540d149..7c78439a0a 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -60,6 +60,7 @@ use super::backend::PoolingBackend; use super::conn_pool::AuthData; use super::conn_pool::Client; use super::conn_pool::ConnInfo; +use super::conn_pool::ConnInfoWithAuth; use super::http_util::json_response; use super::json::json_to_pg_text; use super::json::pg_text_row_to_json; @@ -148,7 +149,7 @@ fn get_conn_info( ctx: &RequestMonitoring, headers: &HeaderMap, tls: Option<&TlsConfig>, -) -> Result { +) -> Result { // HTTP only uses cleartext (for now and likely always) ctx.set_auth_method(crate::context::AuthMethod::Cleartext); @@ -235,11 +236,8 @@ fn get_conn_info( options: options.unwrap_or_default(), }; - Ok(ConnInfo { - user_info, - dbname, - auth, - }) + let conn_info = ConnInfo { user_info, dbname }; + Ok(ConnInfoWithAuth { conn_info, auth }) } // TODO: return different http error codes @@ -523,7 +521,10 @@ async fn handle_inner( // TLS config should be there. let conn_info = get_conn_info(ctx, headers, config.tls_config.as_ref())?; - info!(user = conn_info.user_info.user.as_str(), "credentials"); + info!( + user = conn_info.conn_info.user_info.user.as_str(), + "credentials" + ); // Allow connection pooling only if explicitly requested // or if we have decided that http pool is no longer opt-in @@ -568,20 +569,20 @@ async fn handle_inner( .authenticate_with_password( ctx, &config.authentication_config, - &conn_info.user_info, + &conn_info.conn_info.user_info, pw, ) .await? } AuthData::Jwt(jwt) => { backend - .authenticate_with_jwt(ctx, &conn_info.user_info, jwt) + .authenticate_with_jwt(ctx, &conn_info.conn_info.user_info, jwt) .await? } }; let client = backend - .connect_to_compute(ctx, conn_info, keys, !allow_pool) + .connect_to_compute(ctx, conn_info.conn_info, keys, !allow_pool) .await?; // not strictly necessary to mark success here, // but it's just insurance for if we forget it somewhere else diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml index daf21c70b0..67f32b3cc0 100644 --- a/safekeeper/Cargo.toml +++ b/safekeeper/Cargo.toml @@ -21,7 +21,6 @@ chrono.workspace = true clap = { workspace = true, features = ["derive"] } crc32c.workspace = true fail.workspace = true -git-version.workspace = true hex.workspace = true humantime.workspace = true hyper.workspace = true diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs index 15b0272cd9..589536c7a8 100644 --- a/safekeeper/src/debug_dump.rs +++ b/safekeeper/src/debug_dump.rs @@ -17,6 +17,7 @@ use postgres_ffi::MAX_SEND_SIZE; use serde::Deserialize; use serde::Serialize; +use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName}; use sha2::{Digest, Sha256}; use utils::id::NodeId; use utils::id::TenantTimelineId; @@ -51,6 +52,9 @@ pub struct Args { /// Dump full term history. True by default. pub dump_term_history: bool, + /// Dump last modified time of WAL segments. Uses value of `dump_all` by default. + pub dump_wal_last_modified: bool, + /// Filter timelines by tenant_id. pub tenant_id: Option, @@ -128,12 +132,19 @@ async fn build_from_tli_dump( None }; + let wal_last_modified = if args.dump_wal_last_modified { + get_wal_last_modified(timeline_dir).ok().flatten() + } else { + None + }; + Timeline { tenant_id: timeline.ttid.tenant_id, timeline_id: timeline.ttid.timeline_id, control_file, memory, disk_content, + wal_last_modified, } } @@ -156,6 +167,7 @@ pub struct Timeline { pub control_file: Option, pub memory: Option, pub disk_content: Option, + pub wal_last_modified: Option>, } #[derive(Debug, Serialize, Deserialize)] @@ -302,6 +314,27 @@ fn build_file_info(entry: DirEntry) -> Result { }) } +/// Get highest modified time of WAL segments in the directory. +fn get_wal_last_modified(path: &Utf8Path) -> Result>> { + let mut res = None; + for entry in fs::read_dir(path)? { + if entry.is_err() { + continue; + } + let entry = entry?; + /* Ignore files that are not XLOG segments */ + let fname = entry.file_name(); + if !IsXLogFileName(&fname) && !IsPartialXLogFileName(&fname) { + continue; + } + + let metadata = entry.metadata()?; + let modified: DateTime = DateTime::from(metadata.modified()?); + res = std::cmp::max(res, Some(modified)); + } + Ok(res) +} + /// Converts SafeKeeperConf to Config, filtering out the fields that are not /// supposed to be exposed. fn build_config(config: SafeKeeperConf) -> Config { diff --git a/safekeeper/src/http/openapi_spec.yaml b/safekeeper/src/http/openapi_spec.yaml index 70999853c2..3f14075345 100644 --- a/safekeeper/src/http/openapi_spec.yaml +++ b/safekeeper/src/http/openapi_spec.yaml @@ -1,7 +1,11 @@ openapi: "3.0.2" info: title: Safekeeper control API + description: Neon Safekeeper API version: "1.0" + license: + name: "Apache" + url: https://github.com/neondatabase/neon/blob/main/LICENSE servers: @@ -386,6 +390,12 @@ components: msg: type: string + NotFoundError: + type: object + properties: + msg: + type: string + responses: # diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index e482edea55..b4590fe3e5 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -481,6 +481,7 @@ async fn dump_debug_handler(mut request: Request) -> Result let mut dump_memory: Option = None; let mut dump_disk_content: Option = None; let mut dump_term_history: Option = None; + let mut dump_wal_last_modified: Option = None; let mut tenant_id: Option = None; let mut timeline_id: Option = None; @@ -494,6 +495,7 @@ async fn dump_debug_handler(mut request: Request) -> Result "dump_memory" => dump_memory = Some(parse_kv_str(&k, &v)?), "dump_disk_content" => dump_disk_content = Some(parse_kv_str(&k, &v)?), "dump_term_history" => dump_term_history = Some(parse_kv_str(&k, &v)?), + "dump_wal_last_modified" => dump_wal_last_modified = Some(parse_kv_str(&k, &v)?), "tenant_id" => tenant_id = Some(parse_kv_str(&k, &v)?), "timeline_id" => timeline_id = Some(parse_kv_str(&k, &v)?), _ => Err(ApiError::BadRequest(anyhow::anyhow!( @@ -508,6 +510,7 @@ async fn dump_debug_handler(mut request: Request) -> Result let dump_memory = dump_memory.unwrap_or(dump_all); let dump_disk_content = dump_disk_content.unwrap_or(dump_all); let dump_term_history = dump_term_history.unwrap_or(true); + let dump_wal_last_modified = dump_wal_last_modified.unwrap_or(dump_all); let args = debug_dump::Args { dump_all, @@ -515,6 +518,7 @@ async fn dump_debug_handler(mut request: Request) -> Result dump_memory, dump_disk_content, dump_term_history, + dump_wal_last_modified, tenant_id, timeline_id, }; diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index 64585f5edc..c772ae6de7 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -278,7 +278,7 @@ impl WalResidentTimeline { } /// pull_timeline request body. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] pub struct Request { pub tenant_id: TenantId, pub timeline_id: TimelineId, @@ -293,7 +293,7 @@ pub struct Response { } /// Response for debug dump request. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] pub struct DebugDumpResponse { pub start_time: DateTime, pub finish_time: DateTime, diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index 46c260901d..6e7da94973 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -539,20 +539,17 @@ async fn remove_segments_from_disk( while let Some(entry) = entries.next_entry().await? { let entry_path = entry.path(); let fname = entry_path.file_name().unwrap(); - - if let Some(fname_str) = fname.to_str() { - /* Ignore files that are not XLOG segments */ - if !IsXLogFileName(fname_str) && !IsPartialXLogFileName(fname_str) { - continue; - } - let (segno, _) = XLogFromFileName(fname_str, wal_seg_size); - if remove_predicate(segno) { - remove_file(entry_path).await?; - n_removed += 1; - min_removed = min(min_removed, segno); - max_removed = max(max_removed, segno); - REMOVED_WAL_SEGMENTS.inc(); - } + /* Ignore files that are not XLOG segments */ + if !IsXLogFileName(fname) && !IsPartialXLogFileName(fname) { + continue; + } + let (segno, _) = XLogFromFileName(fname, wal_seg_size)?; + if remove_predicate(segno) { + remove_file(entry_path).await?; + n_removed += 1; + min_removed = min(min_removed, segno); + max_removed = max(max_removed, segno); + REMOVED_WAL_SEGMENTS.inc(); } } diff --git a/storage_broker/Cargo.toml b/storage_broker/Cargo.toml index 82ec0aa272..5359f586e4 100644 --- a/storage_broker/Cargo.toml +++ b/storage_broker/Cargo.toml @@ -15,7 +15,6 @@ const_format.workspace = true futures.workspace = true futures-core.workspace = true futures-util.workspace = true -git-version.workspace = true humantime.workspace = true hyper = { workspace = true, features = ["full"] } once_cell.workspace = true diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index a96d64e096..9ed0501026 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -20,7 +20,6 @@ chrono.workspace = true clap.workspace = true fail.workspace = true futures.workspace = true -git-version.workspace = true hex.workspace = true hyper.workspace = true humantime.workspace = true diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index c46539485c..bafae1f551 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -71,6 +71,37 @@ impl ComputeHookTenant { } } + fn is_sharded(&self) -> bool { + matches!(self, ComputeHookTenant::Sharded(_)) + } + + /// Clear compute hook state for the specified shard. + /// Only valid for [`ComputeHookTenant::Sharded`] instances. + fn remove_shard(&mut self, tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize) { + match self { + ComputeHookTenant::Sharded(sharded) => { + if sharded.stripe_size != stripe_size + || sharded.shard_count != tenant_shard_id.shard_count + { + tracing::warn!("Shard split detected while handling detach") + } + + let shard_idx = sharded.shards.iter().position(|(shard_number, _node_id)| { + *shard_number == tenant_shard_id.shard_number + }); + + if let Some(shard_idx) = shard_idx { + sharded.shards.remove(shard_idx); + } else { + tracing::warn!("Shard not found while handling detach") + } + } + ComputeHookTenant::Unsharded(_) => { + unreachable!("Detach of unsharded tenants is handled externally"); + } + } + } + /// Set one shard's location. If stripe size or shard count have changed, Self is reset /// and drops existing content. fn update( @@ -614,6 +645,36 @@ impl ComputeHook { self.notify_execute(maybe_send_result, tenant_shard_id, cancel) .await } + + /// Reflect a detach for a particular shard in the compute hook state. + /// + /// The goal is to avoid sending compute notifications with stale information (i.e. + /// including detach pageservers). + #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))] + pub(super) fn handle_detach( + &self, + tenant_shard_id: TenantShardId, + stripe_size: ShardStripeSize, + ) { + use std::collections::hash_map::Entry; + + let mut state_locked = self.state.lock().unwrap(); + match state_locked.entry(tenant_shard_id.tenant_id) { + Entry::Vacant(_) => { + tracing::warn!("Compute hook tenant not found for detach"); + } + Entry::Occupied(mut e) => { + let sharded = e.get().is_sharded(); + if !sharded { + e.remove(); + } else { + e.get_mut().remove_shard(tenant_shard_id, stripe_size); + } + + tracing::debug!("Compute hook handled shard detach"); + } + } + } } #[cfg(test)] diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 1745bf5575..4dd8badd03 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -515,7 +515,7 @@ async fn handle_tenant_timeline_passthrough( tracing::info!("Proxying request for tenant {} ({})", tenant_id, path); // Find the node that holds shard zero - let (node, tenant_shard_id) = service.tenant_shard0_node(tenant_id)?; + let (node, tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?; // Callers will always pass an unsharded tenant ID. Before proxying, we must // rewrite this to a shard-aware shard zero ID. @@ -545,10 +545,10 @@ async fn handle_tenant_timeline_passthrough( let _timer = latency.start_timer(labels.clone()); let client = mgmt_api::Client::new(node.base_url(), service.get_config().jwt_token.as_deref()); - let resp = client.get_raw(path).await.map_err(|_e| - // FIXME: give APiError a proper Unavailable variant. We return 503 here because - // if we can't successfully send a request to the pageserver, we aren't available. - ApiError::ShuttingDown)?; + let resp = client.get_raw(path).await.map_err(|e| + // We return 503 here because if we can't successfully send a request to the pageserver, + // either we aren't available or the pageserver is unavailable. + ApiError::ResourceUnavailable(format!("Error sending pageserver API request to {node}: {e}").into()))?; if !resp.status().is_success() { let error_counter = &METRICS_REGISTRY @@ -557,6 +557,19 @@ async fn handle_tenant_timeline_passthrough( error_counter.inc(labels); } + // Transform 404 into 503 if we raced with a migration + if resp.status() == reqwest::StatusCode::NOT_FOUND { + // Look up node again: if we migrated it will be different + let (new_node, _tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?; + if new_node.get_id() != node.get_id() { + // Rather than retry here, send the client a 503 to prompt a retry: this matches + // the pageserver's use of 503, and all clients calling this API should retry on 503. + return Err(ApiError::ResourceUnavailable( + format!("Pageserver {node} returned 404, was migrated to {new_node}").into(), + )); + } + } + // We have a reqest::Response, would like a http::Response let mut builder = hyper::Response::builder().status(map_reqwest_hyper_status(resp.status())?); for (k, v) in resp.headers() { @@ -1849,7 +1862,7 @@ pub fn make_router( RequestName("v1_tenant_timeline"), ) }) - .post( + .put( "/v1/tenant/:tenant_id/timeline/:timeline_id/archival_config", |r| { tenant_service_handler( diff --git a/storage_controller/src/node.rs b/storage_controller/src/node.rs index cb9ce10d23..4cc9b0070d 100644 --- a/storage_controller/src/node.rs +++ b/storage_controller/src/node.rs @@ -2,8 +2,8 @@ use std::{str::FromStr, time::Duration}; use pageserver_api::{ controller_api::{ - NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, - TenantLocateResponseShard, + AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, + NodeSchedulingPolicy, TenantLocateResponseShard, }, shard::TenantShardId, }; @@ -36,7 +36,7 @@ pub(crate) struct Node { listen_pg_addr: String, listen_pg_port: u16, - availability_zone_id: String, + availability_zone_id: AvailabilityZone, // This cancellation token means "stop any RPCs in flight to this node, and don't start // any more". It is not related to process shutdown. @@ -64,8 +64,8 @@ impl Node { } #[allow(unused)] - pub(crate) fn get_availability_zone_id(&self) -> &str { - self.availability_zone_id.as_str() + pub(crate) fn get_availability_zone_id(&self) -> &AvailabilityZone { + &self.availability_zone_id } pub(crate) fn get_scheduling(&self) -> NodeSchedulingPolicy { @@ -181,7 +181,7 @@ impl Node { listen_http_port: u16, listen_pg_addr: String, listen_pg_port: u16, - availability_zone_id: String, + availability_zone_id: AvailabilityZone, ) -> Self { Self { id, @@ -204,7 +204,7 @@ impl Node { listen_http_port: self.listen_http_port as i32, listen_pg_addr: self.listen_pg_addr.clone(), listen_pg_port: self.listen_pg_port as i32, - availability_zone_id: self.availability_zone_id.clone(), + availability_zone_id: self.availability_zone_id.0.clone(), } } @@ -219,7 +219,7 @@ impl Node { listen_http_port: np.listen_http_port as u16, listen_pg_addr: np.listen_pg_addr, listen_pg_port: np.listen_pg_port as u16, - availability_zone_id: np.availability_zone_id, + availability_zone_id: AvailabilityZone(np.availability_zone_id), cancel: CancellationToken::new(), } } diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs index 961a1f78dd..b19cbc4fa3 100644 --- a/storage_controller/src/pageserver_client.rs +++ b/storage_controller/src/pageserver_client.rs @@ -238,7 +238,7 @@ impl PageserverClient { ) -> Result<()> { measured_request!( "timeline_archival_config", - crate::metrics::Method::Post, + crate::metrics::Method::Put, &self.node_id_label, self.inner .timeline_archival_config(tenant_shard_id, timeline_id, req) diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 1dc1040d96..14cc51240d 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -9,6 +9,7 @@ use diesel::pg::PgConnection; use diesel::prelude::*; use diesel::Connection; use itertools::Itertools; +use pageserver_api::controller_api::AvailabilityZone; use pageserver_api::controller_api::MetadataHealthRecord; use pageserver_api::controller_api::ShardSchedulingPolicy; use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy}; @@ -667,8 +668,8 @@ impl Persistence { pub(crate) async fn set_tenant_shard_preferred_azs( &self, - preferred_azs: Vec<(TenantShardId, String)>, - ) -> DatabaseResult> { + preferred_azs: Vec<(TenantShardId, AvailabilityZone)>, + ) -> DatabaseResult> { use crate::schema::tenant_shards::dsl::*; self.with_measured_conn(DatabaseOperation::SetPreferredAzs, move |conn| { @@ -679,7 +680,7 @@ impl Persistence { .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string())) .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32)) .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32)) - .set(preferred_az_id.eq(preferred_az)) + .set(preferred_az_id.eq(preferred_az.0.clone())) .execute(conn)?; if updated == 1 { diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index 83b7b2b4f2..2c42da4043 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -463,7 +463,7 @@ impl Reconciler { for (timeline_id, baseline_lsn) in &baseline { match latest.get(timeline_id) { Some(latest_lsn) => { - tracing::info!("🕑 LSN origin {baseline_lsn} vs destination {latest_lsn}"); + tracing::info!(timeline_id = %timeline_id, "🕑 LSN origin {baseline_lsn} vs destination {latest_lsn}"); if latest_lsn < baseline_lsn { any_behind = true; } @@ -541,6 +541,8 @@ impl Reconciler { } } + pausable_failpoint!("reconciler-live-migrate-pre-generation-inc"); + // Increment generation before attaching to new pageserver self.generation = Some( self.persistence @@ -617,6 +619,8 @@ impl Reconciler { }, ); + pausable_failpoint!("reconciler-live-migrate-post-detach"); + tracing::info!("🔁 Switching to AttachedSingle mode on node {dest_ps}",); let dest_final_conf = build_location_config( &self.shard, @@ -820,6 +824,16 @@ impl Reconciler { self.location_config(&node, conf, None, false).await?; } + // The condition below identifies a detach. We must have no attached intent and + // must have been attached to something previously. Pass this information to + // the [`ComputeHook`] such that it can update its tenant-wide state. + if self.intent.attached.is_none() && !self.detach.is_empty() { + // TODO: Consider notifying control plane about detaches. This would avoid situations + // where the compute tries to start-up with a stale set of pageservers. + self.compute_hook + .handle_detach(self.tenant_shard_id, self.shard.stripe_size); + } + failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue"); Ok(()) diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs index deb5f27226..2414d95eb8 100644 --- a/storage_controller/src/scheduler.rs +++ b/storage_controller/src/scheduler.rs @@ -1,8 +1,8 @@ use crate::{node::Node, tenant_shard::TenantShard}; use itertools::Itertools; -use pageserver_api::models::PageserverUtilization; +use pageserver_api::{controller_api::AvailabilityZone, models::PageserverUtilization}; use serde::Serialize; -use std::collections::HashMap; +use std::{collections::HashMap, fmt::Debug}; use utils::{http::error::ApiError, id::NodeId}; /// Scenarios in which we cannot find a suitable location for a tenant shard @@ -27,17 +27,230 @@ pub enum MaySchedule { } #[derive(Serialize)] -struct SchedulerNode { +pub(crate) struct SchedulerNode { /// How many shards are currently scheduled on this node, via their [`crate::tenant_shard::IntentState`]. shard_count: usize, /// How many shards are currently attached on this node, via their [`crate::tenant_shard::IntentState`]. attached_shard_count: usize, + /// Availability zone id in which the node resides + az: AvailabilityZone, /// Whether this node is currently elegible to have new shards scheduled (this is derived /// from a node's availability state and scheduling policy). may_schedule: MaySchedule, } +pub(crate) trait NodeSchedulingScore: Debug + Ord + Copy + Sized { + fn generate( + node_id: &NodeId, + node: &mut SchedulerNode, + preferred_az: &Option, + context: &ScheduleContext, + ) -> Option; + fn is_overloaded(&self) -> bool; + fn node_id(&self) -> NodeId; +} + +pub(crate) trait ShardTag { + type Score: NodeSchedulingScore; +} + +pub(crate) struct AttachedShardTag {} +impl ShardTag for AttachedShardTag { + type Score = NodeAttachmentSchedulingScore; +} + +pub(crate) struct SecondaryShardTag {} +impl ShardTag for SecondaryShardTag { + type Score = NodeSecondarySchedulingScore; +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +enum AzMatch { + Yes, + No, + Unknown, +} + +impl AzMatch { + fn new(node_az: &AvailabilityZone, shard_preferred_az: Option<&AvailabilityZone>) -> Self { + match shard_preferred_az { + Some(preferred_az) if preferred_az == node_az => Self::Yes, + Some(_preferred_az) => Self::No, + None => Self::Unknown, + } + } +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +struct AttachmentAzMatch(AzMatch); + +impl Ord for AttachmentAzMatch { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Lower scores indicate a more suitable node. + // Note that we prefer a node for which we don't have + // info to a node which we are certain doesn't match the + // preferred AZ of the shard. + let az_match_score = |az_match: &AzMatch| match az_match { + AzMatch::Yes => 0, + AzMatch::Unknown => 1, + AzMatch::No => 2, + }; + + az_match_score(&self.0).cmp(&az_match_score(&other.0)) + } +} + +impl PartialOrd for AttachmentAzMatch { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +struct SecondaryAzMatch(AzMatch); + +impl Ord for SecondaryAzMatch { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Lower scores indicate a more suitable node. + // For secondary locations we wish to avoid the preferred AZ + // of the shard. + let az_match_score = |az_match: &AzMatch| match az_match { + AzMatch::No => 0, + AzMatch::Unknown => 1, + AzMatch::Yes => 2, + }; + + az_match_score(&self.0).cmp(&az_match_score(&other.0)) + } +} + +impl PartialOrd for SecondaryAzMatch { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +/// Scheduling score of a given node for shard attachments. +/// Lower scores indicate more suitable nodes. +/// Ordering is given by member declaration order (top to bottom). +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub(crate) struct NodeAttachmentSchedulingScore { + /// The number of shards belonging to the tenant currently being + /// scheduled that are attached to this node. + affinity_score: AffinityScore, + /// Flag indicating whether this node matches the preferred AZ + /// of the shard. For equal affinity scores, nodes in the matching AZ + /// are considered first. + az_match: AttachmentAzMatch, + /// Size of [`ScheduleContext::attached_nodes`] for the current node. + /// This normally tracks the number of attached shards belonging to the + /// tenant being scheduled that are already on this node. + attached_shards_in_context: usize, + /// Utilisation score that combines shard count and disk utilisation + utilization_score: u64, + /// Total number of shards attached to this node. When nodes have identical utilisation, this + /// acts as an anti-affinity between attached shards. + total_attached_shard_count: usize, + /// Convenience to make selection deterministic in tests and empty systems + node_id: NodeId, +} + +impl NodeSchedulingScore for NodeAttachmentSchedulingScore { + fn generate( + node_id: &NodeId, + node: &mut SchedulerNode, + preferred_az: &Option, + context: &ScheduleContext, + ) -> Option { + let utilization = match &mut node.may_schedule { + MaySchedule::Yes(u) => u, + MaySchedule::No => { + return None; + } + }; + + Some(Self { + affinity_score: context + .nodes + .get(node_id) + .copied() + .unwrap_or(AffinityScore::FREE), + az_match: AttachmentAzMatch(AzMatch::new(&node.az, preferred_az.as_ref())), + attached_shards_in_context: context.attached_nodes.get(node_id).copied().unwrap_or(0), + utilization_score: utilization.cached_score(), + total_attached_shard_count: node.attached_shard_count, + node_id: *node_id, + }) + } + + fn is_overloaded(&self) -> bool { + PageserverUtilization::is_overloaded(self.utilization_score) + } + + fn node_id(&self) -> NodeId { + self.node_id + } +} + +/// Scheduling score of a given node for shard secondaries. +/// Lower scores indicate more suitable nodes. +/// Ordering is given by member declaration order (top to bottom). +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub(crate) struct NodeSecondarySchedulingScore { + /// Flag indicating whether this node matches the preferred AZ + /// of the shard. For secondary locations we wish to avoid nodes in. + /// the preferred AZ of the shard, since that's where the attached location + /// should be scheduled and having the secondary in the same AZ is bad for HA. + az_match: SecondaryAzMatch, + /// The number of shards belonging to the tenant currently being + /// scheduled that are attached to this node. + affinity_score: AffinityScore, + /// Utilisation score that combines shard count and disk utilisation + utilization_score: u64, + /// Total number of shards attached to this node. When nodes have identical utilisation, this + /// acts as an anti-affinity between attached shards. + total_attached_shard_count: usize, + /// Convenience to make selection deterministic in tests and empty systems + node_id: NodeId, +} + +impl NodeSchedulingScore for NodeSecondarySchedulingScore { + fn generate( + node_id: &NodeId, + node: &mut SchedulerNode, + preferred_az: &Option, + context: &ScheduleContext, + ) -> Option { + let utilization = match &mut node.may_schedule { + MaySchedule::Yes(u) => u, + MaySchedule::No => { + return None; + } + }; + + Some(Self { + az_match: SecondaryAzMatch(AzMatch::new(&node.az, preferred_az.as_ref())), + affinity_score: context + .nodes + .get(node_id) + .copied() + .unwrap_or(AffinityScore::FREE), + utilization_score: utilization.cached_score(), + total_attached_shard_count: node.attached_shard_count, + node_id: *node_id, + }) + } + + fn is_overloaded(&self) -> bool { + PageserverUtilization::is_overloaded(self.utilization_score) + } + + fn node_id(&self) -> NodeId { + self.node_id + } +} + impl PartialEq for SchedulerNode { fn eq(&self, other: &Self) -> bool { let may_schedule_matches = matches!( @@ -48,6 +261,7 @@ impl PartialEq for SchedulerNode { may_schedule_matches && self.shard_count == other.shard_count && self.attached_shard_count == other.attached_shard_count + && self.az == other.az } } @@ -162,6 +376,7 @@ impl Scheduler { shard_count: 0, attached_shard_count: 0, may_schedule: node.may_schedule(), + az: node.get_availability_zone_id().clone(), }, ); } @@ -188,6 +403,7 @@ impl Scheduler { shard_count: 0, attached_shard_count: 0, may_schedule: node.may_schedule(), + az: node.get_availability_zone_id().clone(), }, ); } @@ -366,6 +582,7 @@ impl Scheduler { shard_count: 0, attached_shard_count: 0, may_schedule: node.may_schedule(), + az: node.get_availability_zone_id().clone(), }); } } @@ -406,6 +623,29 @@ impl Scheduler { node.and_then(|(node_id, may_schedule)| if may_schedule { Some(node_id) } else { None }) } + /// Compute a schedulling score for each node that the scheduler knows of + /// minus a set of hard excluded nodes. + fn compute_node_scores( + &mut self, + hard_exclude: &[NodeId], + preferred_az: &Option, + context: &ScheduleContext, + ) -> Vec + where + Score: NodeSchedulingScore, + { + self.nodes + .iter_mut() + .filter_map(|(k, v)| { + if hard_exclude.contains(k) { + None + } else { + Score::generate(k, v, preferred_az, context) + } + }) + .collect() + } + /// hard_exclude: it is forbidden to use nodes in this list, typically becacuse they /// are already in use by this shard -- we use this to avoid picking the same node /// as both attached and secondary location. This is a hard constraint: if we cannot @@ -415,29 +655,18 @@ impl Scheduler { /// to their anti-affinity score. We use this to prefeer to avoid placing shards in /// the same tenant on the same node. This is a soft constraint: the context will never /// cause us to fail to schedule a shard. - pub(crate) fn schedule_shard( + pub(crate) fn schedule_shard( &mut self, hard_exclude: &[NodeId], + preferred_az: &Option, context: &ScheduleContext, ) -> Result { if self.nodes.is_empty() { return Err(ScheduleError::NoPageservers); } - let mut scores: Vec<(NodeId, AffinityScore, u64, usize)> = self - .nodes - .iter_mut() - .filter_map(|(k, v)| match &mut v.may_schedule { - MaySchedule::No => None, - MaySchedule::Yes(_) if hard_exclude.contains(k) => None, - MaySchedule::Yes(utilization) => Some(( - *k, - context.nodes.get(k).copied().unwrap_or(AffinityScore::FREE), - utilization.cached_score(), - v.attached_shard_count, - )), - }) - .collect(); + let mut scores = + self.compute_node_scores::(hard_exclude, preferred_az, context); // Exclude nodes whose utilization is critically high, if there are alternatives available. This will // cause us to violate affinity rules if it is necessary to avoid critically overloading nodes: for example @@ -445,20 +674,18 @@ impl Scheduler { // overloaded. let non_overloaded_scores = scores .iter() - .filter(|i| !PageserverUtilization::is_overloaded(i.2)) + .filter(|i| !i.is_overloaded()) .copied() .collect::>(); if !non_overloaded_scores.is_empty() { scores = non_overloaded_scores; } - // Sort by, in order of precedence: - // 1st: Affinity score. We should never pick a higher-score node if a lower-score node is available - // 2nd: Utilization score (this combines shard count and disk utilization) - // 3rd: Attached shard count. When nodes have identical utilization (e.g. when populating some - // empty nodes), this acts as an anti-affinity between attached shards. - // 4th: Node ID. This is a convenience to make selection deterministic in tests and empty systems. - scores.sort_by_key(|i| (i.1, i.2, i.3, i.0)); + // Sort the nodes by score. The one with the lowest scores will be the preferred node. + // Refer to [`NodeAttachmentSchedulingScore`] for attached locations and + // [`NodeSecondarySchedulingScore`] for secondary locations to understand how the nodes + // are ranked. + scores.sort(); if scores.is_empty() { // After applying constraints, no pageservers were left. @@ -481,12 +708,12 @@ impl Scheduler { } // Lowest score wins - let node_id = scores.first().unwrap().0; + let node_id = scores.first().unwrap().node_id(); if !matches!(context.mode, ScheduleMode::Speculative) { tracing::info!( "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?})", - scores.iter().map(|i| i.0 .0).collect::>() + scores.iter().map(|i| i.node_id().0).collect::>() ); } @@ -496,6 +723,12 @@ impl Scheduler { Ok(node_id) } + /// Selects any available node. This is suitable for performing background work (e.g. S3 + /// deletions). + pub(crate) fn any_available_node(&mut self) -> Result { + self.schedule_shard::(&[], &None, &ScheduleContext::default()) + } + /// Unit test access to internal state #[cfg(test)] pub(crate) fn get_node_shard_count(&self, node_id: NodeId) -> usize { @@ -512,13 +745,22 @@ impl Scheduler { pub(crate) mod test_utils { use crate::node::Node; - use pageserver_api::{controller_api::NodeAvailability, models::utilization::test_utilization}; + use pageserver_api::{ + controller_api::{AvailabilityZone, NodeAvailability}, + models::utilization::test_utilization, + }; use std::collections::HashMap; use utils::id::NodeId; + /// Test helper: synthesize the requested number of nodes, all in active state. /// /// Node IDs start at one. - pub(crate) fn make_test_nodes(n: u64) -> HashMap { + /// + /// The `azs` argument specifies the list of availability zones which will be assigned + /// to nodes in round-robin fashion. If empy, a default AZ is assigned. + pub(crate) fn make_test_nodes(n: u64, azs: &[AvailabilityZone]) -> HashMap { + let mut az_iter = azs.iter().cycle(); + (1..n + 1) .map(|i| { (NodeId(i), { @@ -528,7 +770,10 @@ pub(crate) mod test_utils { 80 + i as u16, format!("pghost-{i}"), 5432 + i as u16, - "test-az".to_string(), + az_iter + .next() + .cloned() + .unwrap_or(AvailabilityZone("test-az".to_string())), ); node.set_availability(NodeAvailability::Active(test_utilization::simple(0, 0))); assert!(node.is_available()); @@ -548,7 +793,7 @@ mod tests { use crate::tenant_shard::IntentState; #[test] fn scheduler_basic() -> anyhow::Result<()> { - let nodes = test_utils::make_test_nodes(2); + let nodes = test_utils::make_test_nodes(2, &[]); let mut scheduler = Scheduler::new(nodes.values()); let mut t1_intent = IntentState::new(); @@ -556,9 +801,9 @@ mod tests { let context = ScheduleContext::default(); - let scheduled = scheduler.schedule_shard(&[], &context)?; + let scheduled = scheduler.schedule_shard::(&[], &None, &context)?; t1_intent.set_attached(&mut scheduler, Some(scheduled)); - let scheduled = scheduler.schedule_shard(&[], &context)?; + let scheduled = scheduler.schedule_shard::(&[], &None, &context)?; t2_intent.set_attached(&mut scheduler, Some(scheduled)); assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 1); @@ -567,7 +812,11 @@ mod tests { assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 1); assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 1); - let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers(), &context)?; + let scheduled = scheduler.schedule_shard::( + &t1_intent.all_pageservers(), + &None, + &context, + )?; t1_intent.push_secondary(&mut scheduler, scheduled); assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 1); @@ -607,7 +856,7 @@ mod tests { #[test] /// Test the PageserverUtilization's contribution to scheduling algorithm fn scheduler_utilization() { - let mut nodes = test_utils::make_test_nodes(3); + let mut nodes = test_utils::make_test_nodes(3, &[]); let mut scheduler = Scheduler::new(nodes.values()); // Need to keep these alive because they contribute to shard counts via RAII @@ -621,7 +870,9 @@ mod tests { scheduler: &mut Scheduler, context: &ScheduleContext, ) { - let scheduled = scheduler.schedule_shard(&[], context).unwrap(); + let scheduled = scheduler + .schedule_shard::(&[], &None, context) + .unwrap(); let mut intent = IntentState::new(); intent.set_attached(scheduler, Some(scheduled)); scheduled_intents.push(intent); @@ -729,4 +980,98 @@ mod tests { intent.clear(&mut scheduler); } } + + #[test] + /// A simple test that showcases AZ-aware scheduling and its interaction with + /// affinity scores. + fn az_scheduling() { + let az_a_tag = AvailabilityZone("az-a".to_string()); + let az_b_tag = AvailabilityZone("az-b".to_string()); + + let nodes = test_utils::make_test_nodes(3, &[az_a_tag.clone(), az_b_tag.clone()]); + let mut scheduler = Scheduler::new(nodes.values()); + + // Need to keep these alive because they contribute to shard counts via RAII + let mut scheduled_intents = Vec::new(); + + let mut context = ScheduleContext::default(); + + fn assert_scheduler_chooses( + expect_node: NodeId, + preferred_az: Option, + scheduled_intents: &mut Vec, + scheduler: &mut Scheduler, + context: &mut ScheduleContext, + ) { + let scheduled = scheduler + .schedule_shard::(&[], &preferred_az, context) + .unwrap(); + let mut intent = IntentState::new(); + intent.set_attached(scheduler, Some(scheduled)); + scheduled_intents.push(intent); + assert_eq!(scheduled, expect_node); + + context.avoid(&[scheduled]); + } + + assert_scheduler_chooses::( + NodeId(1), + Some(az_a_tag.clone()), + &mut scheduled_intents, + &mut scheduler, + &mut context, + ); + + // Node 2 and 3 have affinity score equal to 0, but node 3 + // is in "az-a" so we prefer that. + assert_scheduler_chooses::( + NodeId(3), + Some(az_a_tag.clone()), + &mut scheduled_intents, + &mut scheduler, + &mut context, + ); + + // Node 2 is not in "az-a", but it has the lowest affinity so we prefer that. + assert_scheduler_chooses::( + NodeId(2), + Some(az_a_tag.clone()), + &mut scheduled_intents, + &mut scheduler, + &mut context, + ); + + // Avoid nodes in "az-a" for the secondary location. + assert_scheduler_chooses::( + NodeId(2), + Some(az_a_tag.clone()), + &mut scheduled_intents, + &mut scheduler, + &mut context, + ); + + // Avoid nodes in "az-b" for the secondary location. + // Nodes 1 and 3 are identically loaded, so prefer the lowest node id. + assert_scheduler_chooses::( + NodeId(1), + Some(az_b_tag.clone()), + &mut scheduled_intents, + &mut scheduler, + &mut context, + ); + + // Avoid nodes in "az-b" for the secondary location. + // Node 3 has lower affinity score than 1, so prefer that. + assert_scheduler_chooses::( + NodeId(3), + Some(az_b_tag.clone()), + &mut scheduled_intents, + &mut scheduler, + &mut context, + ); + + for mut intent in scheduled_intents { + intent.clear(&mut scheduler); + } + } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index be3efaf688..a5e0129684 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -3,6 +3,7 @@ use std::{ borrow::Cow, cmp::Ordering, collections::{BTreeMap, HashMap, HashSet}, + error::Error, ops::Deref, path::PathBuf, str::FromStr, @@ -218,9 +219,16 @@ fn passthrough_api_error(node: &Node, e: mgmt_api::Error) -> ApiError { format!("{node} error receiving error body: {str}").into(), ) } - mgmt_api::Error::ReceiveBody(str) => { - // Presume errors receiving body are connectivity/availability issues - ApiError::ResourceUnavailable(format!("{node} error receiving body: {str}").into()) + mgmt_api::Error::ReceiveBody(err) if err.is_decode() => { + // Return 500 for decoding errors. + ApiError::InternalServerError(anyhow::Error::from(err).context("error decoding body")) + } + mgmt_api::Error::ReceiveBody(err) => { + // Presume errors receiving body are connectivity/availability issues except for decoding errors + let src_str = err.source().map(|e| e.to_string()).unwrap_or_default(); + ApiError::ResourceUnavailable( + format!("{node} error receiving error body: {err} {}", src_str).into(), + ) } mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, msg) => { ApiError::NotFound(anyhow::anyhow!(format!("{node}: {msg}")).into()) @@ -1257,6 +1265,8 @@ impl Service { #[cfg(feature = "testing")] { + use pageserver_api::controller_api::AvailabilityZone; + // Hack: insert scheduler state for all nodes referenced by shards, as compatibility // tests only store the shards, not the nodes. The nodes will be loaded shortly // after when pageservers start up and register. @@ -1274,7 +1284,7 @@ impl Service { 123, "".to_string(), 123, - "test_az".to_string(), + AvailabilityZone("test_az".to_string()), ); scheduler.node_upsert(&node); @@ -2091,7 +2101,7 @@ impl Service { let az_id = locked .nodes .get(&resp.node_id) - .map(|n| n.get_availability_zone_id().to_string())?; + .map(|n| n.get_availability_zone_id().clone())?; Some((resp.shard_id, az_id)) }) @@ -2621,7 +2631,7 @@ impl Service { let scheduler = &mut locked.scheduler; // Right now we only perform the operation on a single node without parallelization // TODO fan out the operation to multiple nodes for better performance - let node_id = scheduler.schedule_shard(&[], &ScheduleContext::default())?; + let node_id = scheduler.any_available_node()?; let node = locked .nodes .get(&node_id) @@ -2807,7 +2817,7 @@ impl Service { // Pick an arbitrary node to use for remote deletions (does not have to be where the tenant // was attached, just has to be able to see the S3 content) - let node_id = scheduler.schedule_shard(&[], &ScheduleContext::default())?; + let node_id = scheduler.any_available_node()?; let node = nodes .get(&node_id) .expect("Pageservers may not be deleted while lock is active"); @@ -3498,34 +3508,66 @@ impl Service { /// When you need to send an HTTP request to the pageserver that holds shard0 of a tenant, this /// function looks up and returns node. If the tenant isn't found, returns Err(ApiError::NotFound) - pub(crate) fn tenant_shard0_node( + pub(crate) async fn tenant_shard0_node( &self, tenant_id: TenantId, ) -> Result<(Node, TenantShardId), ApiError> { - let locked = self.inner.read().unwrap(); - let Some((tenant_shard_id, shard)) = locked - .tenants - .range(TenantShardId::tenant_range(tenant_id)) - .next() + // Look up in-memory state and maybe use the node from there. + { + let locked = self.inner.read().unwrap(); + let Some((tenant_shard_id, shard)) = locked + .tenants + .range(TenantShardId::tenant_range(tenant_id)) + .next() + else { + return Err(ApiError::NotFound( + anyhow::anyhow!("Tenant {tenant_id} not found").into(), + )); + }; + + let Some(intent_node_id) = shard.intent.get_attached() else { + tracing::warn!( + tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), + "Shard not scheduled (policy {:?}), cannot generate pass-through URL", + shard.policy + ); + return Err(ApiError::Conflict( + "Cannot call timeline API on non-attached tenant".to_string(), + )); + }; + + if shard.reconciler.is_none() { + // Optimization: while no reconcile is in flight, we may trust our in-memory state + // to tell us which pageserver to use. Otherwise we will fall through and hit the database + let Some(node) = locked.nodes.get(intent_node_id) else { + // This should never happen + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "Shard refers to nonexistent node" + ))); + }; + return Ok((node.clone(), *tenant_shard_id)); + } + }; + + // Look up the latest attached pageserver location from the database + // generation state: this will reflect the progress of any ongoing migration. + // Note that it is not guaranteed to _stay_ here, our caller must still handle + // the case where they call through to the pageserver and get a 404. + let db_result = self.persistence.tenant_generations(tenant_id).await?; + let Some(ShardGenerationState { + tenant_shard_id, + generation: _, + generation_pageserver: Some(node_id), + }) = db_result.first() else { - return Err(ApiError::NotFound( - anyhow::anyhow!("Tenant {tenant_id} not found").into(), + // This can happen if we raced with a tenant deletion or a shard split. On a retry + // the caller will either succeed (shard split case), get a proper 404 (deletion case), + // or a conflict response (case where tenant was detached in background) + return Err(ApiError::ResourceUnavailable( + "Shard {} not found in database, or is not attached".into(), )); }; - - // TODO: should use the ID last published to compute_hook, rather than the intent: the intent might - // point to somewhere we haven't attached yet. - let Some(node_id) = shard.intent.get_attached() else { - tracing::warn!( - tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), - "Shard not scheduled (policy {:?}), cannot generate pass-through URL", - shard.policy - ); - return Err(ApiError::Conflict( - "Cannot call timeline API on non-attached tenant".to_string(), - )); - }; - + let locked = self.inner.read().unwrap(); let Some(node) = locked.nodes.get(node_id) else { // This should never happen return Err(ApiError::InternalServerError(anyhow::anyhow!( @@ -4471,7 +4513,7 @@ impl Service { let az_id = locked .nodes .get(node_id) - .map(|n| n.get_availability_zone_id().to_string())?; + .map(|n| n.get_availability_zone_id().clone())?; Some((*tid, az_id)) }) diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index cdb0633e2b..afc89eae00 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -8,11 +8,14 @@ use crate::{ metrics::{self, ReconcileCompleteLabelGroup, ReconcileOutcome}, persistence::TenantShardPersistence, reconciler::{ReconcileUnits, ReconcilerConfig}, - scheduler::{AffinityScore, MaySchedule, RefCountUpdate, ScheduleContext}, + scheduler::{ + AffinityScore, AttachedShardTag, MaySchedule, RefCountUpdate, ScheduleContext, + SecondaryShardTag, + }, service::ReconcileResultRequest, }; use pageserver_api::controller_api::{ - NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy, + AvailabilityZone, NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy, }; use pageserver_api::{ models::{LocationConfig, LocationConfigMode, TenantConfig}, @@ -143,7 +146,7 @@ pub(crate) struct TenantShard { // We should attempt to schedule this shard in the provided AZ to // decrease chances of cross-AZ compute. - preferred_az_id: Option, + preferred_az_id: Option, } #[derive(Default, Clone, Debug, Serialize)] @@ -335,19 +338,19 @@ pub(crate) enum ReconcileWaitError { Failed(TenantShardId, Arc), } -#[derive(Eq, PartialEq, Debug)] +#[derive(Eq, PartialEq, Debug, Clone)] pub(crate) struct ReplaceSecondary { old_node_id: NodeId, new_node_id: NodeId, } -#[derive(Eq, PartialEq, Debug)] +#[derive(Eq, PartialEq, Debug, Clone)] pub(crate) struct MigrateAttachment { pub(crate) old_attached_node_id: NodeId, pub(crate) new_attached_node_id: NodeId, } -#[derive(Eq, PartialEq, Debug)] +#[derive(Eq, PartialEq, Debug, Clone)] pub(crate) enum ScheduleOptimizationAction { // Replace one of our secondary locations with a different node ReplaceSecondary(ReplaceSecondary), @@ -355,7 +358,7 @@ pub(crate) enum ScheduleOptimizationAction { MigrateAttachment(MigrateAttachment), } -#[derive(Eq, PartialEq, Debug)] +#[derive(Eq, PartialEq, Debug, Clone)] pub(crate) struct ScheduleOptimization { // What was the reconcile sequence when we generated this optimization? The optimization // should only be applied if the shard's sequence is still at this value, in case other changes @@ -537,13 +540,22 @@ impl TenantShard { Ok((true, promote_secondary)) } else { // Pick a fresh node: either we had no secondaries or none were schedulable - let node_id = scheduler.schedule_shard(&self.intent.secondary, context)?; + let node_id = scheduler.schedule_shard::( + &self.intent.secondary, + &self.preferred_az_id, + context, + )?; tracing::debug!("Selected {} as attached", node_id); self.intent.set_attached(scheduler, Some(node_id)); Ok((true, node_id)) } } + #[instrument(skip_all, fields( + tenant_id=%self.tenant_shard_id.tenant_id, + shard_id=%self.tenant_shard_id.shard_slug(), + sequence=%self.sequence + ))] pub(crate) fn schedule( &mut self, scheduler: &mut Scheduler, @@ -613,7 +625,11 @@ impl TenantShard { let mut used_pageservers = vec![attached_node_id]; while self.intent.secondary.len() < secondary_count { - let node_id = scheduler.schedule_shard(&used_pageservers, context)?; + let node_id = scheduler.schedule_shard::( + &used_pageservers, + &self.preferred_az_id, + context, + )?; self.intent.push_secondary(scheduler, node_id); used_pageservers.push(node_id); modified = true; @@ -626,7 +642,11 @@ impl TenantShard { modified = true; } else if self.intent.secondary.is_empty() { // Populate secondary by scheduling a fresh node - let node_id = scheduler.schedule_shard(&[], context)?; + let node_id = scheduler.schedule_shard::( + &[], + &self.preferred_az_id, + context, + )?; self.intent.push_secondary(scheduler, node_id); modified = true; } @@ -803,9 +823,11 @@ impl TenantShard { // Let the scheduler suggest a node, where it would put us if we were scheduling afresh // This implicitly limits the choice to nodes that are available, and prefers nodes // with lower utilization. - let Ok(candidate_node) = - scheduler.schedule_shard(&self.intent.all_pageservers(), schedule_context) - else { + let Ok(candidate_node) = scheduler.schedule_shard::( + &self.intent.all_pageservers(), + &self.preferred_az_id, + schedule_context, + ) else { // A scheduling error means we have no possible candidate replacements continue; }; @@ -1302,7 +1324,7 @@ impl TenantShard { pending_compute_notification: false, delayed_reconcile: false, scheduling_policy: serde_json::from_str(&tsp.scheduling_policy).unwrap(), - preferred_az_id: tsp.preferred_az_id, + preferred_az_id: tsp.preferred_az_id.map(AvailabilityZone), }) } @@ -1318,25 +1340,28 @@ impl TenantShard { config: serde_json::to_string(&self.config).unwrap(), splitting: SplitState::default(), scheduling_policy: serde_json::to_string(&self.scheduling_policy).unwrap(), - preferred_az_id: self.preferred_az_id.clone(), + preferred_az_id: self.preferred_az_id.as_ref().map(|az| az.0.clone()), } } - pub(crate) fn preferred_az(&self) -> Option<&str> { - self.preferred_az_id.as_deref() + pub(crate) fn preferred_az(&self) -> Option<&AvailabilityZone> { + self.preferred_az_id.as_ref() } - pub(crate) fn set_preferred_az(&mut self, preferred_az_id: String) { + pub(crate) fn set_preferred_az(&mut self, preferred_az_id: AvailabilityZone) { self.preferred_az_id = Some(preferred_az_id); } } #[cfg(test)] pub(crate) mod tests { + use std::{cell::RefCell, rc::Rc}; + use pageserver_api::{ controller_api::NodeAvailability, shard::{ShardCount, ShardNumber}, }; + use rand::{rngs::StdRng, SeedableRng}; use utils::id::TenantId; use crate::scheduler::test_utils::make_test_nodes; @@ -1365,7 +1390,11 @@ pub(crate) mod tests { ) } - fn make_test_tenant(policy: PlacementPolicy, shard_count: ShardCount) -> Vec { + fn make_test_tenant( + policy: PlacementPolicy, + shard_count: ShardCount, + preferred_az: Option, + ) -> Vec { let tenant_id = TenantId::generate(); (0..shard_count.count()) @@ -1377,7 +1406,7 @@ pub(crate) mod tests { shard_number, shard_count, }; - TenantShard::new( + let mut ts = TenantShard::new( tenant_shard_id, ShardIdentity::new( shard_number, @@ -1386,7 +1415,13 @@ pub(crate) mod tests { ) .unwrap(), policy.clone(), - ) + ); + + if let Some(az) = &preferred_az { + ts.set_preferred_az(az.clone()); + } + + ts }) .collect() } @@ -1397,7 +1432,7 @@ pub(crate) mod tests { fn tenant_ha_scheduling() -> anyhow::Result<()> { // Start with three nodes. Our tenant will only use two. The third one is // expected to remain unused. - let mut nodes = make_test_nodes(3); + let mut nodes = make_test_nodes(3, &[]); let mut scheduler = Scheduler::new(nodes.values()); let mut context = ScheduleContext::default(); @@ -1449,7 +1484,7 @@ pub(crate) mod tests { #[test] fn intent_from_observed() -> anyhow::Result<()> { - let nodes = make_test_nodes(3); + let nodes = make_test_nodes(3, &[]); let mut scheduler = Scheduler::new(nodes.values()); let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1)); @@ -1499,7 +1534,7 @@ pub(crate) mod tests { #[test] fn scheduling_mode() -> anyhow::Result<()> { - let nodes = make_test_nodes(3); + let nodes = make_test_nodes(3, &[]); let mut scheduler = Scheduler::new(nodes.values()); let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1)); @@ -1524,7 +1559,7 @@ pub(crate) mod tests { #[test] fn optimize_attachment() -> anyhow::Result<()> { - let nodes = make_test_nodes(3); + let nodes = make_test_nodes(3, &[]); let mut scheduler = Scheduler::new(nodes.values()); let mut shard_a = make_test_tenant_shard(PlacementPolicy::Attached(1)); @@ -1591,7 +1626,7 @@ pub(crate) mod tests { #[test] fn optimize_secondary() -> anyhow::Result<()> { - let nodes = make_test_nodes(4); + let nodes = make_test_nodes(4, &[]); let mut scheduler = Scheduler::new(nodes.values()); let mut shard_a = make_test_tenant_shard(PlacementPolicy::Attached(1)); @@ -1637,12 +1672,14 @@ pub(crate) mod tests { // Optimize til quiescent: this emulates what Service::optimize_all does, when // called repeatedly in the background. + // Returns the applied optimizations fn optimize_til_idle( nodes: &HashMap, scheduler: &mut Scheduler, shards: &mut [TenantShard], - ) { + ) -> Vec { let mut loop_n = 0; + let mut optimizations = Vec::default(); loop { let mut schedule_context = ScheduleContext::default(); let mut any_changed = false; @@ -1657,6 +1694,7 @@ pub(crate) mod tests { for shard in shards.iter_mut() { let optimization = shard.optimize_attachment(nodes, &schedule_context); if let Some(optimization) = optimization { + optimizations.push(optimization.clone()); shard.apply_optimization(scheduler, optimization); any_changed = true; break; @@ -1664,6 +1702,7 @@ pub(crate) mod tests { let optimization = shard.optimize_secondary(scheduler, &schedule_context); if let Some(optimization) = optimization { + optimizations.push(optimization.clone()); shard.apply_optimization(scheduler, optimization); any_changed = true; break; @@ -1678,20 +1717,22 @@ pub(crate) mod tests { loop_n += 1; assert!(loop_n < 1000); } + + optimizations } /// Test the balancing behavior of shard scheduling: that it achieves a balance, and /// that it converges. #[test] fn optimize_add_nodes() -> anyhow::Result<()> { - let nodes = make_test_nodes(4); + let nodes = make_test_nodes(4, &[]); // Only show the scheduler a couple of nodes let mut scheduler = Scheduler::new([].iter()); scheduler.node_upsert(nodes.get(&NodeId(1)).unwrap()); scheduler.node_upsert(nodes.get(&NodeId(2)).unwrap()); - let mut shards = make_test_tenant(PlacementPolicy::Attached(1), ShardCount::new(4)); + let mut shards = make_test_tenant(PlacementPolicy::Attached(1), ShardCount::new(4), None); let mut schedule_context = ScheduleContext::default(); for shard in &mut shards { assert!(shard @@ -1730,4 +1771,191 @@ pub(crate) mod tests { Ok(()) } + + /// Test that initial shard scheduling is optimal. By optimal we mean + /// that the optimizer cannot find a way to improve it. + /// + /// This test is an example of the scheduling issue described in + /// https://github.com/neondatabase/neon/issues/8969 + #[test] + fn initial_scheduling_is_optimal() -> anyhow::Result<()> { + use itertools::Itertools; + + let nodes = make_test_nodes(2, &[]); + + let mut scheduler = Scheduler::new([].iter()); + scheduler.node_upsert(nodes.get(&NodeId(1)).unwrap()); + scheduler.node_upsert(nodes.get(&NodeId(2)).unwrap()); + + let mut a = make_test_tenant(PlacementPolicy::Attached(1), ShardCount::new(4), None); + let a_context = Rc::new(RefCell::new(ScheduleContext::default())); + + let mut b = make_test_tenant(PlacementPolicy::Attached(1), ShardCount::new(4), None); + let b_context = Rc::new(RefCell::new(ScheduleContext::default())); + + let a_shards_with_context = a.iter_mut().map(|shard| (shard, a_context.clone())); + let b_shards_with_context = b.iter_mut().map(|shard| (shard, b_context.clone())); + + let schedule_order = a_shards_with_context.interleave(b_shards_with_context); + + for (shard, context) in schedule_order { + let context = &mut *context.borrow_mut(); + shard.schedule(&mut scheduler, context).unwrap(); + } + + let applied_to_a = optimize_til_idle(&nodes, &mut scheduler, &mut a); + assert_eq!(applied_to_a, vec![]); + + let applied_to_b = optimize_til_idle(&nodes, &mut scheduler, &mut b); + assert_eq!(applied_to_b, vec![]); + + for shard in a.iter_mut().chain(b.iter_mut()) { + shard.intent.clear(&mut scheduler); + } + + Ok(()) + } + + #[test] + fn random_az_shard_scheduling() -> anyhow::Result<()> { + use rand::seq::SliceRandom; + + for seed in 0..50 { + eprintln!("Running test with seed {seed}"); + let mut rng = StdRng::seed_from_u64(seed); + + let az_a_tag = AvailabilityZone("az-a".to_string()); + let az_b_tag = AvailabilityZone("az-b".to_string()); + let azs = [az_a_tag, az_b_tag]; + let nodes = make_test_nodes(4, &azs); + let mut shards_per_az: HashMap = HashMap::new(); + + let mut scheduler = Scheduler::new([].iter()); + for node in nodes.values() { + scheduler.node_upsert(node); + } + + let mut shards = Vec::default(); + let mut contexts = Vec::default(); + let mut az_picker = azs.iter().cycle().cloned(); + for i in 0..100 { + let az = az_picker.next().unwrap(); + let shard_count = i % 4 + 1; + *shards_per_az.entry(az.clone()).or_default() += shard_count; + + let tenant_shards = make_test_tenant( + PlacementPolicy::Attached(1), + ShardCount::new(shard_count.try_into().unwrap()), + Some(az), + ); + let context = Rc::new(RefCell::new(ScheduleContext::default())); + + contexts.push(context.clone()); + let with_ctx = tenant_shards + .into_iter() + .map(|shard| (shard, context.clone())); + for shard_with_ctx in with_ctx { + shards.push(shard_with_ctx); + } + } + + shards.shuffle(&mut rng); + + #[derive(Default, Debug)] + struct NodeStats { + attachments: u32, + secondaries: u32, + } + + let mut node_stats: HashMap = HashMap::default(); + let mut attachments_in_wrong_az = 0; + let mut secondaries_in_wrong_az = 0; + + for (shard, context) in &mut shards { + let context = &mut *context.borrow_mut(); + shard.schedule(&mut scheduler, context).unwrap(); + + let attached_node = shard.intent.get_attached().unwrap(); + let stats = node_stats.entry(attached_node).or_default(); + stats.attachments += 1; + + let secondary_node = *shard.intent.get_secondary().first().unwrap(); + let stats = node_stats.entry(secondary_node).or_default(); + stats.secondaries += 1; + + let attached_node_az = nodes + .get(&attached_node) + .unwrap() + .get_availability_zone_id(); + let secondary_node_az = nodes + .get(&secondary_node) + .unwrap() + .get_availability_zone_id(); + let preferred_az = shard.preferred_az().unwrap(); + + if attached_node_az != preferred_az { + eprintln!( + "{} attachment was scheduled in AZ {} but preferred AZ {}", + shard.tenant_shard_id, attached_node_az, preferred_az + ); + attachments_in_wrong_az += 1; + } + + if secondary_node_az == preferred_az { + eprintln!( + "{} secondary was scheduled in AZ {} which matches preference", + shard.tenant_shard_id, attached_node_az + ); + secondaries_in_wrong_az += 1; + } + } + + let mut violations = Vec::default(); + + if attachments_in_wrong_az > 0 { + violations.push(format!( + "{} attachments scheduled to the incorrect AZ", + attachments_in_wrong_az + )); + } + + if secondaries_in_wrong_az > 0 { + violations.push(format!( + "{} secondaries scheduled to the incorrect AZ", + secondaries_in_wrong_az + )); + } + + eprintln!( + "attachments_in_wrong_az={} secondaries_in_wrong_az={}", + attachments_in_wrong_az, secondaries_in_wrong_az + ); + + for (node_id, stats) in &node_stats { + let node_az = nodes.get(node_id).unwrap().get_availability_zone_id(); + let ideal_attachment_load = shards_per_az.get(node_az).unwrap() / 2; + let allowed_attachment_load = + (ideal_attachment_load - 1)..(ideal_attachment_load + 2); + + if !allowed_attachment_load.contains(&stats.attachments) { + violations.push(format!( + "Found {} attachments on node {}, but expected {}", + stats.attachments, node_id, ideal_attachment_load + )); + } + + eprintln!( + "{}: attachments={} secondaries={} ideal_attachment_load={}", + node_id, stats.attachments, stats.secondaries, ideal_attachment_load + ); + } + + assert!(violations.is_empty(), "{violations:?}"); + + for (mut shard, _ctx) in shards { + shard.intent.clear(&mut scheduler); + } + } + Ok(()) + } } diff --git a/storage_scrubber/Cargo.toml b/storage_scrubber/Cargo.toml index f9987662b9..a1b5b0b12f 100644 --- a/storage_scrubber/Cargo.toml +++ b/storage_scrubber/Cargo.toml @@ -8,7 +8,6 @@ license.workspace = true aws-sdk-s3.workspace = true either.workspace = true anyhow.workspace = true -git-version.workspace = true hex.workspace = true humantime.workspace = true serde.workspace = true diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index 15dfb101b5..525f412b56 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -1,12 +1,12 @@ -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; -use anyhow::Context; use itertools::Itertools; +use pageserver::tenant::checks::check_valid_layermap; use pageserver::tenant::layer_map::LayerMap; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver_api::shard::ShardIndex; use tokio_util::sync::CancellationToken; -use tracing::{error, info, warn}; +use tracing::{info, warn}; use utils::generation::Generation; use utils::id::TimelineId; @@ -28,9 +28,8 @@ pub(crate) struct TimelineAnalysis { /// yet. pub(crate) warnings: Vec, - /// Keys not referenced in metadata: candidates for removal, but NOT NECESSARILY: beware - /// of races between reading the metadata and reading the objects. - pub(crate) garbage_keys: Vec, + /// Objects whose keys were not recognized at all, i.e. not layer files, not indices, and not initdb archive. + pub(crate) unknown_keys: Vec, } impl TimelineAnalysis { @@ -38,7 +37,7 @@ impl TimelineAnalysis { Self { errors: Vec::new(), warnings: Vec::new(), - garbage_keys: Vec::new(), + unknown_keys: Vec::new(), } } @@ -48,56 +47,6 @@ impl TimelineAnalysis { } } -/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong). -/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example, -/// -/// ```plain -/// | | | | -/// | 1 | | 2 | | 3 | -/// | | | | | | -/// ``` -/// -/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have -/// the same LSN range. -/// -/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example, -/// -/// ```plain -/// | | | 2 | | | -/// | 1 | |-------| | 3 | -/// | | | 4 | | | -/// -/// If layer 2 and 4 contain the same single key, this is also a valid layer map. -fn check_valid_layermap(metadata: &HashMap) -> Option { - let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?) - let mut all_delta_layers = Vec::new(); - for (name, _) in metadata.iter() { - if let LayerName::Delta(layer) = name { - if layer.key_range.start.next() != layer.key_range.end { - all_delta_layers.push(layer.clone()); - } - } - } - for layer in &all_delta_layers { - let lsn_range = &layer.lsn_range; - lsn_split_point.insert(lsn_range.start); - lsn_split_point.insert(lsn_range.end); - } - for layer in &all_delta_layers { - let lsn_range = layer.lsn_range.clone(); - let intersects = lsn_split_point.range(lsn_range).collect_vec(); - if intersects.len() > 1 { - let err = format!( - "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]", - layer, - intersects.into_iter().map(|lsn| lsn.to_string()).join(", ") - ); - return Some(err); - } - } - None -} - pub(crate) async fn branch_cleanup_and_check_errors( remote_client: &GenericRemoteStorage, id: &TenantShardTimelineId, @@ -108,7 +57,7 @@ pub(crate) async fn branch_cleanup_and_check_errors( ) -> TimelineAnalysis { let mut result = TimelineAnalysis::new(); - info!("Checking timeline {id}"); + info!("Checking timeline"); if let Some(s3_active_branch) = s3_active_branch { info!( @@ -129,7 +78,7 @@ pub(crate) async fn branch_cleanup_and_check_errors( match s3_data { Some(s3_data) => { result - .garbage_keys + .unknown_keys .extend(s3_data.unknown_keys.into_iter().map(|k| k.key.to_string())); match s3_data.blob_data { @@ -177,7 +126,8 @@ pub(crate) async fn branch_cleanup_and_check_errors( } } - if let Some(err) = check_valid_layermap(&index_part.layer_metadata) { + let layer_names = index_part.layer_metadata.keys().cloned().collect_vec(); + if let Some(err) = check_valid_layermap(&layer_names) { result.errors.push(format!( "index_part.json contains invalid layer map structure: {err}" )); @@ -252,10 +202,10 @@ pub(crate) async fn branch_cleanup_and_check_errors( warn!("Timeline metadata warnings: {0:?}", result.warnings); } - if !result.garbage_keys.is_empty() { - error!( - "The following keys should be removed from S3: {0:?}", - result.garbage_keys + if !result.unknown_keys.is_empty() { + warn!( + "The following keys are not recognized: {0:?}", + result.unknown_keys ) } @@ -342,10 +292,10 @@ impl TenantObjectListing { pub(crate) struct RemoteTimelineBlobData { pub(crate) blob_data: BlobDataParseResult, - // Index objects that were not used when loading `blob_data`, e.g. those from old generations + /// Index objects that were not used when loading `blob_data`, e.g. those from old generations pub(crate) unused_index_keys: Vec, - // Objects whose keys were not recognized at all, i.e. not layer files, not indices + /// Objects whose keys were not recognized at all, i.e. not layer files, not indices pub(crate) unknown_keys: Vec, } @@ -377,11 +327,54 @@ pub(crate) fn parse_layer_object_name(name: &str) -> Result<(LayerName, Generati } } +/// Note (): +/// Since we do not gurantee the order of the listing, we could list layer keys right before +/// pageserver `RemoteTimelineClient` deletes the layer files and then the index. +/// In the rare case, this would give back a transient error where the index key is missing. +/// +/// To avoid generating false positive, we try streaming the listing for a second time. pub(crate) async fn list_timeline_blobs( remote_client: &GenericRemoteStorage, id: TenantShardTimelineId, root_target: &RootTarget, ) -> anyhow::Result { + let res = list_timeline_blobs_impl(remote_client, id, root_target).await?; + match res { + ListTimelineBlobsResult::Ready(data) => Ok(data), + ListTimelineBlobsResult::MissingIndexPart(_) => { + // Retry if index is missing. + let data = list_timeline_blobs_impl(remote_client, id, root_target) + .await? + .into_data(); + Ok(data) + } + } +} + +enum ListTimelineBlobsResult { + /// Blob data is ready to be intepreted. + Ready(RemoteTimelineBlobData), + /// List timeline blobs has layer files but is missing [`IndexPart`]. + MissingIndexPart(RemoteTimelineBlobData), +} + +impl ListTimelineBlobsResult { + /// Get the inner blob data regardless the status. + pub fn into_data(self) -> RemoteTimelineBlobData { + match self { + ListTimelineBlobsResult::Ready(data) => data, + ListTimelineBlobsResult::MissingIndexPart(data) => data, + } + } +} + +/// Returns [`ListTimelineBlobsResult::MissingIndexPart`] if blob data has layer files +/// but is missing [`IndexPart`], otherwise returns [`ListTimelineBlobsResult::Ready`]. +async fn list_timeline_blobs_impl( + remote_client: &GenericRemoteStorage, + id: TenantShardTimelineId, + root_target: &RootTarget, +) -> anyhow::Result { let mut s3_layers = HashSet::new(); let mut errors = Vec::new(); @@ -423,30 +416,28 @@ pub(crate) async fn list_timeline_blobs( s3_layers.insert((new_layer, gen)); } Err(e) => { - tracing::info!("Error parsing key {maybe_layer_name}"); - errors.push( - format!("S3 list response got an object with key {key} that is not a layer name: {e}"), - ); + tracing::info!("Error parsing {maybe_layer_name} as layer name: {e}"); unknown_keys.push(obj); } }, None => { - tracing::warn!("Unknown key {key}"); - errors.push(format!("S3 list response got an object with odd key {key}")); + tracing::info!("S3 listed an unknown key: {key}"); unknown_keys.push(obj); } } } - if index_part_keys.is_empty() && s3_layers.is_empty() && initdb_archive { - tracing::debug!( - "Timeline is empty apart from initdb archive: expected post-deletion state." - ); - return Ok(RemoteTimelineBlobData { + if index_part_keys.is_empty() && s3_layers.is_empty() { + tracing::debug!("Timeline is empty: expected post-deletion state."); + if initdb_archive { + tracing::info!("Timeline is post deletion but initdb archive is still present."); + } + + return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData { blob_data: BlobDataParseResult::Relic, unused_index_keys: index_part_keys, - unknown_keys: Vec::new(), - }); + unknown_keys, + })); } // Choose the index_part with the highest generation @@ -472,19 +463,43 @@ pub(crate) async fn list_timeline_blobs( match index_part_object.as_ref() { Some(selected) => index_part_keys.retain(|k| k != selected), None => { - errors.push("S3 list response got no index_part.json file".to_string()); + // It is possible that the branch gets deleted after we got some layer files listed + // and we no longer have the index file in the listing. + errors.push( + "S3 list response got no index_part.json file but still has layer files" + .to_string(), + ); + return Ok(ListTimelineBlobsResult::MissingIndexPart( + RemoteTimelineBlobData { + blob_data: BlobDataParseResult::Incorrect { errors, s3_layers }, + unused_index_keys: index_part_keys, + unknown_keys, + }, + )); } } if let Some(index_part_object_key) = index_part_object.as_ref() { let index_part_bytes = - download_object_with_retries(remote_client, &index_part_object_key.key) - .await - .context("index_part.json download")?; + match download_object_with_retries(remote_client, &index_part_object_key.key).await { + Ok(index_part_bytes) => index_part_bytes, + Err(e) => { + // It is possible that the branch gets deleted in-between we list the objects + // and we download the index part file. + errors.push(format!("failed to download index_part.json: {e}")); + return Ok(ListTimelineBlobsResult::MissingIndexPart( + RemoteTimelineBlobData { + blob_data: BlobDataParseResult::Incorrect { errors, s3_layers }, + unused_index_keys: index_part_keys, + unknown_keys, + }, + )); + } + }; match serde_json::from_slice(&index_part_bytes) { Ok(index_part) => { - return Ok(RemoteTimelineBlobData { + return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData { blob_data: BlobDataParseResult::Parsed { index_part: Box::new(index_part), index_part_generation, @@ -492,7 +507,7 @@ pub(crate) async fn list_timeline_blobs( }, unused_index_keys: index_part_keys, unknown_keys, - }) + })) } Err(index_parse_error) => errors.push(format!( "index_part.json body parsing error: {index_parse_error}" @@ -506,9 +521,9 @@ pub(crate) async fn list_timeline_blobs( ); } - Ok(RemoteTimelineBlobData { + Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData { blob_data: BlobDataParseResult::Incorrect { errors, s3_layers }, unused_index_keys: index_part_keys, unknown_keys, - }) + })) } diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index ee133e2e58..ee816534c6 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -41,6 +41,10 @@ struct Cli { #[arg(long)] /// JWT token for authenticating with storage controller. Requires scope 'scrubber' or 'admin'. controller_jwt: Option, + + /// If set to true, the scrubber will exit with error code on fatal error. + #[arg(long, default_value_t = false)] + exit_code: bool, } #[derive(Subcommand, Debug)] @@ -203,6 +207,7 @@ async fn main() -> anyhow::Result<()> { tenant_ids, json, post_to_storcon, + cli.exit_code, ) .await } @@ -269,6 +274,7 @@ async fn main() -> anyhow::Result<()> { gc_min_age, gc_mode, post_to_storcon, + cli.exit_code, ) .await } @@ -284,6 +290,7 @@ pub async fn run_cron_job( gc_min_age: humantime::Duration, gc_mode: GcMode, post_to_storcon: bool, + exit_code: bool, ) -> anyhow::Result<()> { tracing::info!(%gc_min_age, %gc_mode, "Running pageserver-physical-gc"); pageserver_physical_gc_cmd( @@ -301,6 +308,7 @@ pub async fn run_cron_job( Vec::new(), true, post_to_storcon, + exit_code, ) .await?; @@ -349,6 +357,7 @@ pub async fn scan_pageserver_metadata_cmd( tenant_shard_ids: Vec, json: bool, post_to_storcon: bool, + exit_code: bool, ) -> anyhow::Result<()> { if controller_client.is_none() && post_to_storcon { return Err(anyhow!("Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run")); @@ -380,6 +389,9 @@ pub async fn scan_pageserver_metadata_cmd( if summary.is_fatal() { tracing::error!("Fatal scrub errors detected"); + if exit_code { + std::process::exit(1); + } } else if summary.is_empty() { // Strictly speaking an empty bucket is a valid bucket, but if someone ran the // scrubber they were likely expecting to scan something, and if we see no timelines @@ -391,6 +403,9 @@ pub async fn scan_pageserver_metadata_cmd( .prefix_in_bucket .unwrap_or("".to_string()) ); + if exit_code { + std::process::exit(1); + } } Ok(()) diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs index 151ef27672..c1ea589f7f 100644 --- a/storage_scrubber/src/scan_pageserver_metadata.rs +++ b/storage_scrubber/src/scan_pageserver_metadata.rs @@ -12,6 +12,7 @@ use pageserver_api::controller_api::MetadataHealthUpdateRequest; use pageserver_api::shard::TenantShardId; use remote_storage::GenericRemoteStorage; use serde::Serialize; +use tracing::{info_span, Instrument}; use utils::id::TenantId; use utils::shard::ShardCount; @@ -169,45 +170,54 @@ pub async fn scan_pageserver_metadata( let mut timeline_ids = HashSet::new(); let mut timeline_generations = HashMap::new(); for (ttid, data) in timelines { - if ttid.tenant_shard_id.shard_count == highest_shard_count { - // Only analyze `TenantShardId`s with highest shard count. + async { + if ttid.tenant_shard_id.shard_count == highest_shard_count { + // Only analyze `TenantShardId`s with highest shard count. - // Stash the generation of each timeline, for later use identifying orphan layers - if let BlobDataParseResult::Parsed { - index_part, - index_part_generation, - s3_layers: _s3_layers, - } = &data.blob_data - { - if index_part.deleted_at.is_some() { - // skip deleted timeline. - tracing::info!("Skip analysis of {} b/c timeline is already deleted", ttid); - continue; + // Stash the generation of each timeline, for later use identifying orphan layers + if let BlobDataParseResult::Parsed { + index_part, + index_part_generation, + s3_layers: _s3_layers, + } = &data.blob_data + { + if index_part.deleted_at.is_some() { + // skip deleted timeline. + tracing::info!( + "Skip analysis of {} b/c timeline is already deleted", + ttid + ); + return; + } + timeline_generations.insert(ttid, *index_part_generation); } - timeline_generations.insert(ttid, *index_part_generation); + + // Apply checks to this timeline shard's metadata, and in the process update `tenant_objects` + // reference counts for layers across the tenant. + let analysis = branch_cleanup_and_check_errors( + remote_client, + &ttid, + &mut tenant_objects, + None, + None, + Some(data), + ) + .await; + summary.update_analysis(&ttid, &analysis); + + timeline_ids.insert(ttid.timeline_id); + } else { + tracing::info!( + "Skip analysis of {} b/c a lower shard count than {}", + ttid, + highest_shard_count.0, + ); } - - // Apply checks to this timeline shard's metadata, and in the process update `tenant_objects` - // reference counts for layers across the tenant. - let analysis = branch_cleanup_and_check_errors( - remote_client, - &ttid, - &mut tenant_objects, - None, - None, - Some(data), - ) - .await; - summary.update_analysis(&ttid, &analysis); - - timeline_ids.insert(ttid.timeline_id); - } else { - tracing::info!( - "Skip analysis of {} b/c a lower shard count than {}", - ttid, - highest_shard_count.0, - ); } + .instrument( + info_span!("analyze-timeline", shard = %ttid.tenant_shard_id.shard_slug(), timeline = %ttid.timeline_id), + ) + .await } summary.timeline_count += timeline_ids.len(); @@ -278,6 +288,7 @@ pub async fn scan_pageserver_metadata( timelines, highest_shard_count, ) + .instrument(info_span!("analyze-tenant", tenant = %prev_tenant_id)) .await; tenant_id = Some(ttid.tenant_shard_id.tenant_id); highest_shard_count = ttid.tenant_shard_id.shard_count; @@ -306,15 +317,18 @@ pub async fn scan_pageserver_metadata( tenant_timeline_results.push((ttid, data)); } + let tenant_id = tenant_id.expect("Must be set if results are present"); + if !tenant_timeline_results.is_empty() { analyze_tenant( &remote_client, - tenant_id.expect("Must be set if results are present"), + tenant_id, &mut summary, tenant_objects, tenant_timeline_results, highest_shard_count, ) + .instrument(info_span!("analyze-tenant", tenant = %tenant_id)) .await; } diff --git a/test_runner/cloud_regress/test_cloud_regress.py b/test_runner/cloud_regress/test_cloud_regress.py new file mode 100644 index 0000000000..de71357232 --- /dev/null +++ b/test_runner/cloud_regress/test_cloud_regress.py @@ -0,0 +1,100 @@ +""" +Run the regression tests on the cloud instance of Neon +""" + +from pathlib import Path +from typing import Any + +import psycopg2 +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import RemotePostgres +from fixtures.pg_version import PgVersion + + +@pytest.fixture +def setup(remote_pg: RemotePostgres): + """ + Setup and teardown of the tests + """ + with psycopg2.connect(remote_pg.connstr()) as conn: + with conn.cursor() as cur: + log.info("Creating the extension") + cur.execute("CREATE EXTENSION IF NOT EXISTS regress_so") + conn.commit() + # TODO: Migrate to branches and remove this code + log.info("Looking for subscriptions in the regress database") + cur.execute( + "SELECT subname FROM pg_catalog.pg_subscription WHERE " + "subdbid = (SELECT oid FROM pg_catalog.pg_database WHERE datname='regression');" + ) + if cur.rowcount > 0: + with psycopg2.connect( + dbname="regression", + host=remote_pg.default_options["host"], + user=remote_pg.default_options["user"], + password=remote_pg.default_options["password"], + ) as regress_conn: + with regress_conn.cursor() as regress_cur: + for sub in cur: + regress_cur.execute(f"ALTER SUBSCRIPTION {sub[0]} DISABLE") + regress_cur.execute( + f"ALTER SUBSCRIPTION {sub[0]} SET (slot_name = NONE)" + ) + regress_cur.execute(f"DROP SUBSCRIPTION {sub[0]}") + regress_conn.commit() + + yield + # TODO: Migrate to branches and remove this code + log.info("Looking for extra roles...") + with psycopg2.connect(remote_pg.connstr()) as conn: + with conn.cursor() as cur: + cur.execute( + "SELECT rolname FROM pg_catalog.pg_roles WHERE oid > 16384 AND rolname <> 'neondb_owner'" + ) + roles: list[Any] = [] + for role in cur: + log.info("Role found: %s", role[0]) + roles.append(role[0]) + for role in roles: + cur.execute(f"DROP ROLE {role}") + conn.commit() + + +@pytest.mark.timeout(7200) +@pytest.mark.remote_cluster +def test_cloud_regress( + setup, + remote_pg: RemotePostgres, + pg_version: PgVersion, + pg_distrib_dir: Path, + base_dir: Path, + test_output_dir: Path, +): + """ + Run the regression tests + """ + regress_bin = ( + pg_distrib_dir / f"{pg_version.v_prefixed}/lib/postgresql/pgxs/src/test/regress/pg_regress" + ) + test_path = base_dir / f"vendor/postgres-{pg_version.v_prefixed}/src/test/regress" + + env_vars = { + "PGHOST": remote_pg.default_options["host"], + "PGPORT": str( + remote_pg.default_options["port"] if "port" in remote_pg.default_options else 5432 + ), + "PGUSER": remote_pg.default_options["user"], + "PGPASSWORD": remote_pg.default_options["password"], + "PGDATABASE": remote_pg.default_options["dbname"], + } + regress_cmd = [ + str(regress_bin), + f"--inputdir={test_path}", + f"--bindir={pg_distrib_dir}/{pg_version.v_prefixed}/bin", + "--dlpath=/usr/local/lib", + "--max-concurrent-tests=20", + f"--schedule={test_path}/parallel_schedule", + "--max-connections=5", + ] + remote_pg.pg_bin.run(regress_cmd, env=env_vars, cwd=test_output_dir) diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index cda70be8da..005dc6cb0d 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -102,6 +102,11 @@ def histogram(prefix_without_trailing_underscore: str) -> List[str]: return [f"{prefix_without_trailing_underscore}_{x}" for x in ["bucket", "count", "sum"]] +def counter(name: str) -> str: + # the prometheus_client package appends _total to all counters client-side + return f"{name}_total" + + PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = ( "pageserver_remote_timeline_client_calls_started_total", "pageserver_remote_timeline_client_calls_finished_total", @@ -132,9 +137,14 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = ( *histogram("pageserver_wait_lsn_seconds"), *histogram("pageserver_remote_operation_seconds"), *histogram("pageserver_io_operations_seconds"), + "pageserver_smgr_query_started_global_count_total", "pageserver_tenant_states_count", "pageserver_circuit_breaker_broken_total", "pageserver_circuit_breaker_unbroken_total", + counter("pageserver_tenant_throttling_count_accounted_start_global"), + counter("pageserver_tenant_throttling_count_accounted_finish_global"), + counter("pageserver_tenant_throttling_wait_usecs_sum_global"), + counter("pageserver_tenant_throttling_count_global"), ) PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( @@ -146,6 +156,7 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_smgr_query_seconds_bucket", "pageserver_smgr_query_seconds_count", "pageserver_smgr_query_seconds_sum", + "pageserver_smgr_query_started_count_total", "pageserver_archive_size", "pageserver_pitr_history_size", "pageserver_layer_bytes", @@ -157,6 +168,10 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_evictions_with_low_residence_duration_total", "pageserver_aux_file_estimated_size", "pageserver_valid_lsn_lease_count", + counter("pageserver_tenant_throttling_count_accounted_start"), + counter("pageserver_tenant_throttling_count_accounted_finish"), + counter("pageserver_tenant_throttling_wait_usecs_sum"), + counter("pageserver_tenant_throttling_count"), *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, # "pageserver_directory_entries_count", -- only used if above a certain threshold # "pageserver_broken_tenants_count" -- used only for broken diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index cbbb162cc6..70fe632f49 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -642,9 +642,6 @@ class NeonEnvBuilder: patch_script = "" for ps in self.env.pageservers: patch_script += f"UPDATE nodes SET listen_http_port={ps.service_port.http}, listen_pg_port={ps.service_port.pg} WHERE node_id = '{ps.id}';" - # This is a temporary to get the backward compat test happy - # since the compat snapshot was generated with an older version of neon local - patch_script += f"UPDATE nodes SET availability_zone_id='{ps.az_id}' WHERE node_id = '{ps.id}' AND availability_zone_id IS NULL;" patch_script_path.write_text(patch_script) # Update the config with info about tenants and timelines @@ -849,7 +846,7 @@ class NeonEnvBuilder: for directory_to_clean in reversed(directories_to_clean): if not os.listdir(directory_to_clean): - log.info(f"Removing empty directory {directory_to_clean}") + log.debug(f"Removing empty directory {directory_to_clean}") try: directory_to_clean.rmdir() except Exception as e: @@ -2553,7 +2550,7 @@ class NeonStorageController(MetricsGetter, LogUtils): desired_availability: Optional[PageserverAvailability], desired_scheduling_policy: Optional[PageserverSchedulingPolicy], max_attempts: int, - backoff: int, + backoff: float, ): """ Poll the node status until it reaches 'desired_scheduling_policy' and 'desired_availability' @@ -2948,7 +2945,7 @@ class NeonPageserver(PgProtocol, LogUtils): self.id ): self.env.storage_controller.poll_node_status( - self.id, PageserverAvailability.ACTIVE, None, max_attempts=20, backoff=1 + self.id, PageserverAvailability.ACTIVE, None, max_attempts=200, backoff=0.1 ) return self @@ -3863,9 +3860,6 @@ def static_proxy( dbname = vanilla_pg.default_options["dbname"] auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}" - # require password for 'http_auth' user - vanilla_pg.edit_hba([f"host {dbname} http_auth {host} password"]) - # For simplicity, we use the same user for both `--auth-endpoint` and `safe_psql` vanilla_pg.start() vanilla_pg.safe_psql("create user proxy with login superuser password 'password'") @@ -4620,7 +4614,8 @@ class StorageScrubber: "REGION": s3_storage.bucket_region, "BUCKET": s3_storage.bucket_name, "BUCKET_PREFIX": s3_storage.prefix_in_bucket, - "RUST_LOG": "DEBUG", + "RUST_LOG": "INFO", + "PAGESERVER_DISABLE_FILE_LOGGING": "1", } env.update(s3_storage.access_env_vars()) @@ -4640,10 +4635,8 @@ class StorageScrubber: (output_path, stdout, status_code) = subprocess_capture( self.log_dir, args, - echo_stderr=True, - echo_stdout=True, env=env, - check=False, + check=True, capture_stdout=True, timeout=timeout, ) diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 582f9c0264..0dd557c59f 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -631,7 +631,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): log.info( f"requesting timeline archival config {config} for tenant {tenant_id} and timeline {timeline_id}" ) - res = self.post( + res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/archival_config", json=config, ) diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 80f1c9e4e3..10e8412b19 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -236,7 +236,7 @@ def get_scale_for_db(size_mb: int) -> int: ATTACHMENT_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg] - r"regression\.diffs|.+\.(?:log|stderr|stdout|filediff|metrics|html|walredo)" + r"regression\.(diffs|out)|.+\.(?:log|stderr|stdout|filediff|metrics|html|walredo)" ) diff --git a/test_runner/regress/test_branch_and_gc.py b/test_runner/regress/test_branch_and_gc.py index f2e3855c12..d7c4cf059a 100644 --- a/test_runner/regress/test_branch_and_gc.py +++ b/test_runner/regress/test_branch_and_gc.py @@ -142,6 +142,7 @@ def test_branch_creation_before_gc(neon_simple_env: NeonEnv): "image_creation_threshold": "1", # set PITR interval to be small, so we can do GC "pitr_interval": "0 s", + "lsn_lease_length": "0s", } ) diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py index 0a5336f5a2..2bf7041cf1 100644 --- a/test_runner/regress/test_branch_behind.py +++ b/test_runner/regress/test_branch_behind.py @@ -11,7 +11,9 @@ from fixtures.utils import print_gc_result, query_scalar # def test_branch_behind(neon_env_builder: NeonEnvBuilder): # Disable pitr, because here we want to test branch creation after GC - env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"}) + env = neon_env_builder.init_start( + initial_tenant_conf={"pitr_interval": "0 sec", "lsn_lease_length": "0s"} + ) error_regexes = [ ".*invalid branch start lsn.*", diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 1729e2fc98..3d5c34a595 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -419,7 +419,7 @@ def test_duplicate_creation(neon_env_builder: NeonEnvBuilder): def test_branching_while_stuck_find_gc_cutoffs(neon_env_builder: NeonEnvBuilder): - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) client = env.pageserver.http_client() diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index be787e0642..cb34551b53 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -240,6 +240,7 @@ def test_uploads_and_deletions( "image_creation_threshold": "1", "image_layer_creation_check_threshold": "0", "compaction_algorithm": json.dumps({"kind": compaction_algorithm.value}), + "lsn_lease_length": "0s", } env = neon_env_builder.init_start(initial_tenant_conf=tenant_conf) diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py new file mode 100644 index 0000000000..6138c322d7 --- /dev/null +++ b/test_runner/regress/test_compute_metrics.py @@ -0,0 +1,21 @@ +from fixtures.neon_fixtures import NeonEnv + + +def test_compute_metrics(neon_simple_env: NeonEnv): + """ + Test compute metrics, exposed in the neon_backend_perf_counters and + neon_perf_counters views + """ + env = neon_simple_env + endpoint = env.endpoints.create_start("main") + + conn = endpoint.connect() + cur = conn.cursor() + + # We don't check that the values make sense, this is just a very + # basic check that the server doesn't crash or something like that. + # + # 1.5 is the minimum version to contain these views. + cur.execute("CREATE EXTENSION neon VERSION '1.5'") + cur.execute("SELECT * FROM neon_perf_counters") + cur.execute("SELECT * FROM neon_backend_perf_counters") diff --git a/test_runner/regress/test_hot_standby.py b/test_runner/regress/test_hot_standby.py index d94704012f..be8f70bb70 100644 --- a/test_runner/regress/test_hot_standby.py +++ b/test_runner/regress/test_hot_standby.py @@ -198,9 +198,6 @@ def test_hot_standby_gc(neon_env_builder: NeonEnvBuilder, pause_apply: bool): def run_pgbench(connstr: str, pg_bin: PgBin): log.info(f"Start a pgbench workload on pg {connstr}") - # s10 is about 150MB of data. In debug mode init takes about 15s on SSD. - pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", connstr]) - log.info("pgbench init done") pg_bin.run_capture(["pgbench", "-T60", connstr]) @@ -222,7 +219,7 @@ def pgbench_accounts_initialized(ep): # Without hs feedback enabled we'd see 'User query might have needed to see row # versions that must be removed.' errors. def test_hot_standby_feedback(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) agressive_vacuum_conf = [ "log_autovacuum_min_duration = 0", "autovacuum_naptime = 10s", @@ -247,9 +244,15 @@ def test_hot_standby_feedback(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): log.info( f"primary connstr is {primary.connstr()}, secondary connstr {secondary.connstr()}" ) + + # s10 is about 150MB of data. In debug mode init takes about 15s on SSD. + pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10", primary.connstr()]) + log.info("pgbench init done in primary") + t = threading.Thread(target=run_pgbench, args=(primary.connstr(), pg_bin)) t.start() - # Wait until pgbench_accounts is created + filled on replica *and* + + # Wait until we see that the pgbench_accounts is created + filled on replica *and* # index is created. Otherwise index creation would conflict with # read queries and hs feedback won't save us. wait_until(60, 1.0, partial(pgbench_accounts_initialized, secondary)) diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index 193149ea03..97093ea535 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -173,6 +173,7 @@ def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder): # "image_creation_threshold": set at runtime "compaction_target_size": f"{128 * (1024**2)}", # make it so that we only have 1 partition => image coverage for delta layers => enables gc of delta layers "image_layer_creation_check_threshold": "0", # always check if a new image layer can be created + "lsn_lease_length": "0s", } def tenant_update_config(changes): diff --git a/test_runner/regress/test_lfc_resize.py b/test_runner/regress/test_lfc_resize.py index cb0b30d9c6..0f791e9247 100644 --- a/test_runner/regress/test_lfc_resize.py +++ b/test_runner/regress/test_lfc_resize.py @@ -10,11 +10,11 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, PgBin -# -# Test branching, when a transaction is in prepared state -# @pytest.mark.timeout(600) def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin): + """ + Test resizing the Local File Cache + """ env = neon_simple_env endpoint = env.endpoints.create_start( "main", @@ -32,27 +32,48 @@ def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin): pg_bin.run_capture(["pgbench", "-i", f"-s{scale}", connstr]) pg_bin.run_capture(["pgbench", "-c10", f"-T{n_resize}", "-Mprepared", "-S", connstr]) - thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True) + # Initializing the pgbench database can be very slow, especially on debug builds. + connstr = endpoint.connstr(options="-cstatement_timeout=300s") + + thread = threading.Thread(target=run_pgbench, args=(connstr,), daemon=True) thread.start() conn = endpoint.connect() cur = conn.cursor() - for _ in range(n_resize): + # For as long as pgbench is running, twiddle the LFC size once a second. + # Note that we launch this immediately, already while the "pgbench -i" + # initialization step is still running. That's quite a different workload + # than the actual pgbench benchamark run, so this gives us coverage of both. + while thread.is_alive(): size = random.randint(1, 512) cur.execute(f"alter system set neon.file_cache_size_limit='{size}MB'") cur.execute("select pg_reload_conf()") time.sleep(1) - - cur.execute("alter system set neon.file_cache_size_limit='100MB'") - cur.execute("select pg_reload_conf()") - thread.join() - lfc_file_path = f"{endpoint.pg_data_dir_path()}/file.cache" - lfc_file_size = os.path.getsize(lfc_file_path) - res = subprocess.run(["ls", "-sk", lfc_file_path], check=True, text=True, capture_output=True) - lfc_file_blocks = re.findall("([0-9A-F]+)", res.stdout)[0] - log.info(f"Size of LFC file {lfc_file_size}, blocks {lfc_file_blocks}") - assert lfc_file_size <= 512 * 1024 * 1024 + # At the end, set it at 100 MB, and perform a final check that the disk usage + # of the file is in that ballbark. + # + # We retry the check a few times, because it might take a while for the + # system to react to changing the setting and shrinking the file. + cur.execute("alter system set neon.file_cache_size_limit='100MB'") + cur.execute("select pg_reload_conf()") + nretries = 10 + while True: + lfc_file_path = f"{endpoint.pg_data_dir_path()}/file.cache" + lfc_file_size = os.path.getsize(lfc_file_path) + res = subprocess.run( + ["ls", "-sk", lfc_file_path], check=True, text=True, capture_output=True + ) + lfc_file_blocks = re.findall("([0-9A-F]+)", res.stdout)[0] + log.info(f"Size of LFC file {lfc_file_size}, blocks {lfc_file_blocks}") + assert lfc_file_size <= 512 * 1024 * 1024 + + if int(lfc_file_blocks) <= 128 * 1024 or nretries == 0: + break + + nretries = nretries - 1 + time.sleep(1) + assert int(lfc_file_blocks) <= 128 * 1024 diff --git a/test_runner/regress/test_neon_extension.py b/test_runner/regress/test_neon_extension.py index bb844244e3..22a6013225 100644 --- a/test_runner/regress/test_neon_extension.py +++ b/test_runner/regress/test_neon_extension.py @@ -50,8 +50,8 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder): # Ensure that the default version is also updated in the neon.control file assert cur.fetchone() == ("1.4",) cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE") - all_versions = ["1.4", "1.3", "1.2", "1.1", "1.0"] - current_version = "1.4" + all_versions = ["1.5", "1.4", "1.3", "1.2", "1.1", "1.0"] + current_version = "1.5" for idx, begin_version in enumerate(all_versions): for target_version in all_versions[idx + 1 :]: if current_version != begin_version: diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index c923713432..519994f774 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -53,6 +53,7 @@ TENANT_CONF = { # create image layers eagerly, so that GC can remove some layers "image_creation_threshold": "1", "image_layer_creation_check_threshold": "0", + "lsn_lease_length": "0s", } diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py index 347fc3a04d..5e8b8d38f7 100644 --- a/test_runner/regress/test_readonly_node.py +++ b/test_runner/regress/test_readonly_node.py @@ -122,6 +122,7 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder): Test static endpoint is protected from GC by acquiring and renewing lsn leases. """ + LSN_LEASE_LENGTH = 8 neon_env_builder.num_pageservers = 2 # GC is manual triggered. env = neon_env_builder.init_start( @@ -139,7 +140,7 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder): "image_creation_threshold": "1", "image_layer_creation_check_threshold": "0", # Short lease length to fit test. - "lsn_lease_length": "3s", + "lsn_lease_length": f"{LSN_LEASE_LENGTH}s", }, initial_tenant_shard_count=2, ) @@ -170,10 +171,14 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder): with env.endpoints.create_start("main") as ep_main: with ep_main.cursor() as cur: cur.execute("CREATE TABLE t0(v0 int primary key, v1 text)") - lsn = None + lsn = Lsn(0) for i in range(2): lsn = generate_updates_on_main(env, ep_main, i) + # Round down to the closest LSN on page boundary (unnormalized). + XLOG_BLCKSZ = 8192 + lsn = Lsn((int(lsn) // XLOG_BLCKSZ) * XLOG_BLCKSZ) + with env.endpoints.create_start( branch_name="main", endpoint_id="static", @@ -183,7 +188,8 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder): cur.execute("SELECT count(*) FROM t0") assert cur.fetchone() == (ROW_COUNT,) - time.sleep(3) + # Wait for static compute to renew lease at least once. + time.sleep(LSN_LEASE_LENGTH / 2) generate_updates_on_main(env, ep_main, i, end=100) @@ -204,8 +210,9 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder): # Do some update so we can increment latest_gc_cutoff generate_updates_on_main(env, ep_main, i, end=100) + # Wait for the existing lease to expire. + time.sleep(LSN_LEASE_LENGTH) # Now trigger GC again, layers should be removed. - time.sleep(4) for shard, ps in tenant_get_shards(env, env.initial_tenant): client = ps.http_client() gc_result = client.timeline_gc(shard, env.initial_timeline, 0) diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 2e5260ca78..0a57fc9605 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -244,6 +244,7 @@ def test_remote_storage_upload_queue_retries( # create image layers eagerly, so that GC can remove some layers "image_creation_threshold": "1", "image_layer_creation_check_threshold": "0", + "lsn_lease_length": "0s", } ) @@ -391,6 +392,7 @@ def test_remote_timeline_client_calls_started_metric( # disable background compaction and GC. We invoke it manually when we want it to happen. "gc_period": "0s", "compaction_period": "0s", + "lsn_lease_length": "0s", } ) diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 4a84dca399..1eb33b2d39 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -200,6 +200,7 @@ def test_sharding_split_compaction(neon_env_builder: NeonEnvBuilder, failpoint: # Disable automatic creation of image layers, as we will create them explicitly when we want them "image_creation_threshold": 9999, "image_layer_creation_check_threshold": 0, + "lsn_lease_length": "0s", } neon_env_builder.storage_controller_config = { diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index dc90a6e9a0..3861f0b822 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -4,6 +4,7 @@ import threading import time from collections import defaultdict from datetime import datetime, timezone +from enum import Enum from typing import Any, Dict, List, Optional, Set, Tuple, Union import pytest @@ -485,7 +486,7 @@ def test_storage_controller_compute_hook( httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler) # Start running - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) # Initial notification from tenant creation assert len(notifications) == 1 @@ -2466,6 +2467,87 @@ def test_storage_controller_validate_during_migration(neon_env_builder: NeonEnvB raise +class MigrationFailpoints(Enum): + # While only the origin is attached + PRE_GENERATION_INC = "reconciler-live-migrate-pre-generation-inc" + # While both locations are attached + POST_NOTIFY = "reconciler-live-migrate-post-notify" + # While only the destination is attached + POST_DETACH = "reconciler-live-migrate-post-detach" + + +@pytest.mark.parametrize( + "migration_failpoint", + [ + MigrationFailpoints.PRE_GENERATION_INC, + MigrationFailpoints.POST_NOTIFY, + MigrationFailpoints.POST_DETACH, + ], +) +def test_storage_controller_proxy_during_migration( + neon_env_builder: NeonEnvBuilder, migration_failpoint: MigrationFailpoints +): + """ + If we send a proxied GET request to the controller during a migration, it should route + the request to whichever pageserver was most recently issued a generation. + + Reproducer for https://github.com/neondatabase/neon/issues/9062 + """ + neon_env_builder.num_pageservers = 2 + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + env = neon_env_builder.init_configs() + env.start() + + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + env.neon_cli.create_tenant(tenant_id, timeline_id) + + # Activate a failpoint that will cause live migration to get stuck _after_ the generation has been issued + # to the new pageserver: this should result in requests routed to the new pageserver. + env.storage_controller.configure_failpoints((migration_failpoint.value, "pause")) + + origin_pageserver = env.get_tenant_pageserver(tenant_id) + dest_ps_id = [p.id for p in env.pageservers if p.id != origin_pageserver.id][0] + + try: + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + migrate_fut = executor.submit( + env.storage_controller.tenant_shard_migrate, + TenantShardId(tenant_id, 0, 0), + dest_ps_id, + ) + + def has_hit_migration_failpoint(): + expr = f"at failpoint {str(migration_failpoint.value)}" + log.info(expr) + assert env.storage_controller.log_contains(expr) + + wait_until(10, 1, has_hit_migration_failpoint) + + # This request should be routed to whichever pageserver holds the highest generation + tenant_info = env.storage_controller.pageserver_api().tenant_status( + tenant_id, + ) + + if migration_failpoint in ( + MigrationFailpoints.POST_NOTIFY, + MigrationFailpoints.POST_DETACH, + ): + # We expect request to land on the destination + assert tenant_info["generation"] == 2 + elif migration_failpoint == MigrationFailpoints.PRE_GENERATION_INC: + # We expect request to land on the origin + assert tenant_info["generation"] == 1 + + # Eventually migration completes + env.storage_controller.configure_failpoints((migration_failpoint.value, "off")) + migrate_fut.result() + except: + # Always disable 'pause' failpoints, even on failure, to avoid hanging in shutdown + env.storage_controller.configure_failpoints((migration_failpoint.value, "off")) + raise + + @run_only_on_default_postgres("this is like a 'unit test' against storcon db") def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_configs() diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index 848e214c5e..b6c19f03f6 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -204,6 +204,7 @@ def test_scrubber_physical_gc_ancestors( # No PITR, so that as soon as child shards generate an image layer, it covers ancestor deltas # and makes them GC'able "pitr_interval": "0s", + "lsn_lease_length": "0s", }, ) diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index b165588636..e7c6d5a4c3 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -266,13 +266,13 @@ def test_tenant_reattach_while_busy( def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): - env = neon_env_builder.init_start() + env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) pageserver_http = env.pageserver.http_client() env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS) # create new nenant - tenant_id, timeline_id = env.neon_cli.create_tenant() + tenant_id, timeline_id = env.initial_tenant, env.initial_timeline # assert tenant exists on disk assert env.pageserver.tenant_dir(tenant_id).exists() diff --git a/test_runner/regress/test_timeline_gc_blocking.py b/test_runner/regress/test_timeline_gc_blocking.py index ddfe9b911f..765c72cf2a 100644 --- a/test_runner/regress/test_timeline_gc_blocking.py +++ b/test_runner/regress/test_timeline_gc_blocking.py @@ -45,7 +45,10 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool tenant_after = http.tenant_status(env.initial_tenant) assert tenant_before != tenant_after gc_blocking = tenant_after["gc_blocking"] - assert gc_blocking == "BlockingReasons { timelines: 1, reasons: EnumSet(Manual) }" + assert ( + gc_blocking + == "BlockingReasons { tenant_blocked_by_lsn_lease_deadline: false, timelines: 1, reasons: EnumSet(Manual) }" + ) wait_for_another_gc_round() pss.assert_log_contains(gc_skipped_line) diff --git a/test_runner/regress/test_twophase.py b/test_runner/regress/test_twophase.py index ebe65e7c29..75fab78d6e 100644 --- a/test_runner/regress/test_twophase.py +++ b/test_runner/regress/test_twophase.py @@ -8,6 +8,7 @@ from fixtures.neon_fixtures import ( PgBin, fork_at_current_lsn, import_timeline_from_vanilla_postgres, + wait_for_wal_insert_lsn, ) @@ -22,11 +23,6 @@ def twophase_test_on_timeline(env: NeonEnv): conn = endpoint.connect() cur = conn.cursor() - # FIXME: Switch to the next WAL segment, to work around the bug fixed in - # https://github.com/neondatabase/neon/pull/8914. When that is merged, this can be - # removed. - cur.execute("select pg_switch_wal()") - cur.execute("CREATE TABLE foo (t text)") # Prepare a transaction that will insert a row @@ -140,3 +136,28 @@ def test_twophase_nonzero_epoch( vanilla_pg.stop() # don't need the original server anymore twophase_test_on_timeline(env) + + +def test_twophase_at_wal_segment_start(neon_simple_env: NeonEnv): + """ + Same as 'test_twophase' test, but the server is started at an LSN at the beginning + of a WAL segment. We had a bug where we didn't initialize the "long XLOG page header" + at the beginning of the segment correctly, which was detected when the checkpointer + tried to read the XLOG_XACT_PREPARE record from the WAL, if that record was on the + very first page of a WAL segment and the server was started up at that first page. + """ + env = neon_simple_env + timeline_id = env.neon_cli.create_branch("test_twophase", "main") + + endpoint = env.endpoints.create_start( + "test_twophase", config_lines=["max_prepared_transactions=5"] + ) + endpoint.safe_psql("SELECT pg_switch_wal()") + + # to avoid hitting https://github.com/neondatabase/neon/issues/9079, wait for the + # WAL to reach the pageserver. + wait_for_wal_insert_lsn(env, endpoint, env.initial_tenant, timeline_id) + + endpoint.stop_and_destroy() + + twophase_test_on_timeline(env) diff --git a/test_runner/regress/test_unlogged.py b/test_runner/regress/test_unlogged.py index deba29536c..4431ccd959 100644 --- a/test_runner/regress/test_unlogged.py +++ b/test_runner/regress/test_unlogged.py @@ -15,8 +15,13 @@ def test_unlogged(neon_simple_env: NeonEnv): cur = conn.cursor() cur.execute("CREATE UNLOGGED TABLE iut (id int);") - # create index to test unlogged index relation as well + # create index to test unlogged index relations as well cur.execute("CREATE UNIQUE INDEX iut_idx ON iut (id);") + cur.execute("CREATE INDEX ON iut USING gist (int4range(id, id, '[]'));") + cur.execute("CREATE INDEX ON iut USING spgist (int4range(id, id, '[]'));") + cur.execute("CREATE INDEX ON iut USING gin ((id::text::jsonb));") + cur.execute("CREATE INDEX ON iut USING brin (id);") + cur.execute("CREATE INDEX ON iut USING hash (id);") cur.execute("ALTER TABLE iut ADD COLUMN seq int GENERATED ALWAYS AS IDENTITY;") cur.execute("INSERT INTO iut (id) values (42);") @@ -39,3 +44,12 @@ def test_unlogged(neon_simple_env: NeonEnv): assert results == [(43, 2)] else: assert results == [(43, 1)] + + # Flush all data and compact it, so we detect any errors related to + # unlogged indexes materialization. + ps_http = env.pageserver.http_client() + ps_http.timeline_compact( + tenant_id=env.initial_tenant, + timeline_id=env.initial_timeline, + force_image_layer_creation=True, + ) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 4bf8cfe88f..8ee548bdb0 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -892,6 +892,7 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): log.info(f"debug_dump before reboot {debug_dump_0}") assert debug_dump_0["timelines_count"] == 1 assert debug_dump_0["timelines"][0]["timeline_id"] == str(timeline_id) + assert debug_dump_0["timelines"][0]["wal_last_modified"] != "" endpoint.safe_psql("create table t(i int)") diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index a317b9b5b9..2199b83fb7 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit a317b9b5b96978b49e78986697f3dd80d06f99a7 +Subproject commit 2199b83fb72680001ce0f43bf6187a21dfb8f45d diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 6f6d77fb59..22e580fe9f 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 6f6d77fb5960602fcd3fd130aca9f99ecb1619c9 +Subproject commit 22e580fe9ffcea7e02592110b1c9bf426d83cada diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 0baa7346df..e131a9c027 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 0baa7346dfd42d61912eeca554c9bb0a190f0a1e +Subproject commit e131a9c027b202ce92bd7b9cf2569d48a6f9948e diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 9156d63ce2..68b5038f27 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 9156d63ce253bed9d1f76355ceec610e444eaffa +Subproject commit 68b5038f27e493bde6ae552fe066f10cbdfe6a14 diff --git a/vendor/revisions.json b/vendor/revisions.json index c2c34962bb..896a75814e 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ - "17rc1", - "9156d63ce253bed9d1f76355ceec610e444eaffa" + "17.0", + "68b5038f27e493bde6ae552fe066f10cbdfe6a14" ], "v16": [ "16.4", - "0baa7346dfd42d61912eeca554c9bb0a190f0a1e" + "e131a9c027b202ce92bd7b9cf2569d48a6f9948e" ], "v15": [ "15.8", - "6f6d77fb5960602fcd3fd130aca9f99ecb1619c9" + "22e580fe9ffcea7e02592110b1c9bf426d83cada" ], "v14": [ "14.13", - "a317b9b5b96978b49e78986697f3dd80d06f99a7" + "2199b83fb72680001ce0f43bf6187a21dfb8f45d" ] } diff --git a/vm-image-spec.yaml b/vm-image-spec.yaml deleted file mode 100644 index c94f95f447..0000000000 --- a/vm-image-spec.yaml +++ /dev/null @@ -1,550 +0,0 @@ -# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image. ---- -commands: - - name: cgconfigparser - user: root - sysvInitAction: sysinit - shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664' - # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for - # running it as root. - - name: chmod-resize-swap - user: root - sysvInitAction: sysinit - shell: 'chmod 711 /neonvm/bin/resize-swap' - - name: pgbouncer - user: postgres - sysvInitAction: respawn - shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini' - - name: postgres-exporter - user: nobody - sysvInitAction: respawn - shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter' - - name: sql-exporter - user: nobody - sysvInitAction: respawn - shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399' - - name: sql-exporter-autoscaling - user: nobody - sysvInitAction: respawn - shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499' -shutdownHook: | - su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10' -files: - - filename: compute_ctl-resize-swap - content: | - # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap - # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL) - postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap - - filename: pgbouncer.ini - content: | - [databases] - *=host=localhost port=5432 auth_user=cloud_admin - [pgbouncer] - listen_port=6432 - listen_addr=0.0.0.0 - auth_type=scram-sha-256 - auth_user=cloud_admin - auth_dbname=postgres - client_tls_sslmode=disable - server_tls_sslmode=disable - pool_mode=transaction - max_client_conn=10000 - default_pool_size=64 - max_prepared_statements=0 - admin_users=postgres - unix_socket_dir=/tmp/ - unix_socket_mode=0777 - - filename: cgconfig.conf - content: | - # Configuration for cgroups in VM compute nodes - group neon-postgres { - perm { - admin { - uid = postgres; - } - task { - gid = users; - } - } - memory {} - } - - filename: sql_exporter.yml - content: | - # Configuration for sql_exporter - # Global defaults. - global: - # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. - scrape_timeout: 10s - # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. - scrape_timeout_offset: 500ms - # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. - min_interval: 0s - # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, - # as will concurrent scrapes. - max_connections: 1 - # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should - # always be the same as max_connections. - max_idle_connections: 1 - # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. - # If 0, connections are not closed due to a connection's age. - max_connection_lifetime: 5m - - # The target to monitor and the collectors to execute on it. - target: - # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) - # the schema gets dropped or replaced to match the driver expected DSN format. - data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' - - # Collectors (referenced by name) to execute on the target. - # Glob patterns are supported (see for syntax). - collectors: [neon_collector] - - # Collector files specifies a list of globs. One collector definition is read from each matching file. - # Glob patterns are supported (see for syntax). - collector_files: - - "neon_collector.yml" - - filename: sql_exporter_autoscaling.yml - content: | - # Configuration for sql_exporter for autoscaling-agent - # Global defaults. - global: - # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s. - scrape_timeout: 10s - # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first. - scrape_timeout_offset: 500ms - # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape. - min_interval: 0s - # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections, - # as will concurrent scrapes. - max_connections: 1 - # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should - # always be the same as max_connections. - max_idle_connections: 1 - # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse. - # If 0, connections are not closed due to a connection's age. - max_connection_lifetime: 5m - - # The target to monitor and the collectors to execute on it. - target: - # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL) - # the schema gets dropped or replaced to match the driver expected DSN format. - data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' - - # Collectors (referenced by name) to execute on the target. - # Glob patterns are supported (see for syntax). - collectors: [neon_collector_autoscaling] - - # Collector files specifies a list of globs. One collector definition is read from each matching file. - # Glob patterns are supported (see for syntax). - collector_files: - - "neon_collector_autoscaling.yml" - - filename: neon_collector.yml - content: | - collector_name: neon_collector - metrics: - - metric_name: lfc_misses - type: gauge - help: 'lfc_misses' - key_labels: - values: [lfc_misses] - query: | - select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; - - - metric_name: lfc_used - type: gauge - help: 'LFC chunks used (chunk = 1MB)' - key_labels: - values: [lfc_used] - query: | - select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; - - - metric_name: lfc_hits - type: gauge - help: 'lfc_hits' - key_labels: - values: [lfc_hits] - query: | - select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; - - - metric_name: lfc_writes - type: gauge - help: 'lfc_writes' - key_labels: - values: [lfc_writes] - query: | - select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; - - - metric_name: lfc_cache_size_limit - type: gauge - help: 'LFC cache size limit in bytes' - key_labels: - values: [lfc_cache_size_limit] - query: | - select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; - - - metric_name: connection_counts - type: gauge - help: 'Connection counts' - key_labels: - - datname - - state - values: [count] - query: | - select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state; - - - metric_name: pg_stats_userdb - type: gauge - help: 'Stats for several oldest non-system dbs' - key_labels: - - datname - value_label: kind - values: - - db_size - - deadlocks - # Rows - - inserted - - updated - - deleted - # We export stats for 10 non-system database. Without this limit - # it is too easy to abuse the system by creating lots of databases. - query: | - select pg_database_size(datname) as db_size, deadlocks, - tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted, - datname - from pg_stat_database - where datname IN ( - select datname - from pg_database - where datname <> 'postgres' and not datistemplate - order by oid - limit 10 - ); - - - metric_name: max_cluster_size - type: gauge - help: 'neon.max_cluster_size setting' - key_labels: - values: [max_cluster_size] - query: | - select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size'; - - - metric_name: db_total_size - type: gauge - help: 'Size of all databases' - key_labels: - values: [total] - query: | - select sum(pg_database_size(datname)) as total from pg_database; - - # DEPRECATED - - metric_name: lfc_approximate_working_set_size - type: gauge - help: 'Approximate working set size in pages of 8192 bytes' - key_labels: - values: [approximate_working_set_size] - query: | - select neon.approximate_working_set_size(false) as approximate_working_set_size; - - - metric_name: lfc_approximate_working_set_size_windows - type: gauge - help: 'Approximate working set size in pages of 8192 bytes' - key_labels: [duration] - values: [size] - # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection - # of durations in a pretty-printed form. - query: | - select - x as duration, - neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size - from - (values ('5m'),('15m'),('1h')) as t (x); - - - metric_name: compute_current_lsn - type: gauge - help: 'Current LSN of the database' - key_labels: - values: [lsn] - query: | - select - case - when pg_catalog.pg_is_in_recovery() - then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8 - else (pg_current_wal_lsn() - '0/0')::FLOAT8 - end as lsn; - - - metric_name: compute_receive_lsn - type: gauge - help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication' - key_labels: - values: [lsn] - query: | - SELECT - CASE - WHEN pg_catalog.pg_is_in_recovery() - THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8 - ELSE 0 - END AS lsn; - - - metric_name: replication_delay_bytes - type: gauge - help: 'Bytes between received and replayed LSN' - key_labels: - values: [replication_delay_bytes] - # We use a GREATEST call here because this calculation can be negative. - # The calculation is not atomic, meaning after we've gotten the receive - # LSN, the replay LSN may have advanced past the receive LSN we - # are using for the calculation. - query: | - SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes; - - - metric_name: replication_delay_seconds - type: gauge - help: 'Time since last LSN was replayed' - key_labels: - values: [replication_delay_seconds] - query: | - SELECT - CASE - WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 - ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())) - END AS replication_delay_seconds; - - - metric_name: checkpoints_req - type: gauge - help: 'Number of requested checkpoints' - key_labels: - values: [checkpoints_req] - query: | - SELECT checkpoints_req FROM pg_stat_bgwriter; - - - metric_name: checkpoints_timed - type: gauge - help: 'Number of scheduled checkpoints' - key_labels: - values: [checkpoints_timed] - query: | - SELECT checkpoints_timed FROM pg_stat_bgwriter; - - - metric_name: compute_logical_snapshot_files - type: gauge - help: 'Number of snapshot files in pg_logical/snapshot' - key_labels: - - timeline_id - values: [num_logical_snapshot_files] - query: | - SELECT - (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id, - -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These - -- temporary snapshot files are renamed to the actual snapshot files after they are - -- completely built. We only WAL-log the completely built snapshot files. - (SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files; - - # In all the below metrics, we cast LSNs to floats because Prometheus only supports floats. - # It's probably fine because float64 can store integers from -2^53 to +2^53 exactly. - - # Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad. - - metric_name: logical_slot_restart_lsn - type: gauge - help: 'restart_lsn of logical slots' - key_labels: - - slot_name - values: [restart_lsn] - query: | - select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn - from pg_replication_slots - where slot_type = 'logical'; - - - metric_name: compute_subscriptions_count - type: gauge - help: 'Number of logical replication subscriptions grouped by enabled/disabled' - key_labels: - - enabled - values: [subscriptions_count] - query: | - select subenabled::text as enabled, count(*) as subscriptions_count - from pg_subscription - group by subenabled; - - - metric_name: retained_wal - type: gauge - help: 'Retained WAL in inactive replication slots' - key_labels: - - slot_name - values: [retained_wal] - query: | - SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal - FROM pg_replication_slots - WHERE active = false; - - - metric_name: wal_is_lost - type: gauge - help: 'Whether or not the replication slot wal_status is lost' - key_labels: - - slot_name - values: [wal_is_lost] - query: | - SELECT slot_name, - CASE - WHEN wal_status = 'lost' THEN 1 - ELSE 0 - END AS wal_is_lost - FROM pg_replication_slots; - - - filename: neon_collector_autoscaling.yml - content: | - collector_name: neon_collector_autoscaling - metrics: - - metric_name: lfc_misses - type: gauge - help: 'lfc_misses' - key_labels: - values: [lfc_misses] - query: | - select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses'; - - - metric_name: lfc_used - type: gauge - help: 'LFC chunks used (chunk = 1MB)' - key_labels: - values: [lfc_used] - query: | - select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used'; - - - metric_name: lfc_hits - type: gauge - help: 'lfc_hits' - key_labels: - values: [lfc_hits] - query: | - select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits'; - - - metric_name: lfc_writes - type: gauge - help: 'lfc_writes' - key_labels: - values: [lfc_writes] - query: | - select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes'; - - - metric_name: lfc_cache_size_limit - type: gauge - help: 'LFC cache size limit in bytes' - key_labels: - values: [lfc_cache_size_limit] - query: | - select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit; - - - metric_name: lfc_approximate_working_set_size_windows - type: gauge - help: 'Approximate working set size in pages of 8192 bytes' - key_labels: [duration_seconds] - values: [size] - # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set - # size looking back 1..60 minutes, labeled with the number of minutes. - query: | - select - x::text as duration_seconds, - neon.approximate_working_set_size_seconds(x) as size - from - (select generate_series * 60 as x from generate_series(1, 60)) as t (x); -build: | - # Build cgroup-tools - # - # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically - # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor - # requires cgroup v2, so we'll build cgroup-tools ourselves. - FROM debian:bullseye-slim as libcgroup-builder - ENV LIBCGROUP_VERSION=v2.0.3 - - RUN set -exu \ - && apt update \ - && apt install --no-install-recommends -y \ - git \ - ca-certificates \ - automake \ - cmake \ - make \ - gcc \ - byacc \ - flex \ - libtool \ - libpam0g-dev \ - && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \ - && INSTALL_DIR="/libcgroup-install" \ - && mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \ - && cd libcgroup \ - # extracted from bootstrap.sh, with modified flags: - && (test -d m4 || mkdir m4) \ - && autoreconf -fi \ - && rm -rf autom4te.cache \ - && CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \ - # actually build the thing... - && make install - - FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter - - FROM burningalchemist/sql_exporter:0.13 AS sql-exporter - - # Build pgbouncer - # - FROM debian:bullseye-slim AS pgbouncer - RUN set -e \ - && apt-get update \ - && apt-get install -y \ - build-essential \ - git \ - libevent-dev \ - libtool \ - pkg-config - - # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) - ENV PGBOUNCER_TAG=pgbouncer_1_22_1 - RUN set -e \ - && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \ - && cd pgbouncer \ - && ./autogen.sh \ - && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \ - && make -j $(nproc) dist_man_MANS= \ - && make install dist_man_MANS= -merge: | - # tweak nofile limits - RUN set -e \ - && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \ - && test ! -e /etc/security || ( \ - echo '* - nofile 1048576' >>/etc/security/limits.conf \ - && echo 'root - nofile 1048576' >>/etc/security/limits.conf \ - ) - - # Allow postgres user (compute_ctl) to run swap resizer. - # Need to install sudo in order to allow this. - # - # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe. - RUN set -e \ - && apt update \ - && apt install --no-install-recommends -y \ - sudo \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap - - COPY cgconfig.conf /etc/cgconfig.conf - COPY pgbouncer.ini /etc/pgbouncer.ini - COPY sql_exporter.yml /etc/sql_exporter.yml - COPY neon_collector.yml /etc/neon_collector.yml - COPY sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml - COPY neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml - - RUN set -e \ - && chown postgres:postgres /etc/pgbouncer.ini \ - && chmod 0666 /etc/pgbouncer.ini \ - && chmod 0644 /etc/cgconfig.conf \ - && chmod 0644 /etc/sql_exporter.yml \ - && chmod 0644 /etc/neon_collector.yml \ - && chmod 0644 /etc/sql_exporter_autoscaling.yml \ - && chmod 0644 /etc/neon_collector_autoscaling.yml - - COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ - COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/ - COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/ - COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter - COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter - COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 662916d42c..e6d21e9434 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -45,6 +45,7 @@ futures-io = { version = "0.3" } futures-util = { version = "0.3", features = ["channel", "io", "sink"] } generic-array = { version = "0.14", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2", default-features = false, features = ["std"] } +half = { version = "2", default-features = false, features = ["num-traits"] } hashbrown = { version = "0.14", features = ["raw"] } hex = { version = "0.4", features = ["serde"] } hmac = { version = "0.12", default-features = false, features = ["reset"] } @@ -106,6 +107,7 @@ cc = { version = "1", default-features = false, features = ["parallel"] } chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"] } either = { version = "1" } getrandom = { version = "0.2", default-features = false, features = ["std"] } +half = { version = "2", default-features = false, features = ["num-traits"] } hashbrown = { version = "0.14", features = ["raw"] } indexmap = { version = "1", default-features = false, features = ["std"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12", default-features = false, features = ["use_std"] }