Merge branch 'main' into communicator-rewrite

This commit is contained in:
Erik Grinaker
2025-07-17 10:59:37 +02:00
129 changed files with 5887 additions and 1063 deletions

View File

@@ -27,4 +27,4 @@
!storage_controller/
!vendor/postgres-*/
!workspace_hack/
!build_tools/patches
!build-tools/patches

View File

@@ -176,7 +176,13 @@ runs:
fi
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
# We don't use code coverage for regression tests (the step is disabled),
# so there's no need to collect it.
# Ref https://github.com/neondatabase/neon/issues/4540
# cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
cov_prefix=()
# Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files
export LLVM_PROFILE_FILE=/dev/null
else
cov_prefix=()
fi

View File

@@ -150,7 +150,7 @@ jobs:
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
use-fallback: false
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
- name: Cache postgres v15 build
id: cache_pg_15
@@ -162,7 +162,7 @@ jobs:
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
use-fallback: false
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
- name: Cache postgres v16 build
id: cache_pg_16
@@ -174,7 +174,7 @@ jobs:
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
use-fallback: false
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
- name: Cache postgres v17 build
id: cache_pg_17
@@ -186,7 +186,7 @@ jobs:
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
use-fallback: false
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
- name: Build all
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables

View File

@@ -72,7 +72,7 @@ jobs:
ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
IMAGE_TAG: |
${{ hashFiles('build-tools.Dockerfile',
${{ hashFiles('build-tools/Dockerfile',
'.github/workflows/build-build-tools-image.yml') }}
run: |
echo "archs=${ARCHS}" | tee -a ${GITHUB_OUTPUT}
@@ -144,7 +144,7 @@ jobs:
- uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
with:
file: build-tools.Dockerfile
file: build-tools/Dockerfile
context: .
provenance: false
push: true

View File

@@ -87,22 +87,27 @@ jobs:
uses: ./.github/workflows/build-build-tools-image.yml
secrets: inherit
lint-openapi-spec:
runs-on: ubuntu-22.04
needs: [ meta, check-permissions ]
lint-yamls:
needs: [ meta, check-permissions, build-build-tools-image ]
# We do need to run this in `.*-rc-pr` because of hotfixes.
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- run: make -C compute manifest-schema-validation
- run: make lint-openapi-spec
check-codestyle-python:
@@ -217,28 +222,6 @@ jobs:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
secrets: inherit
validate-compute-manifest:
runs-on: ubuntu-22.04
needs: [ meta, check-permissions ]
# We do need to run this in `.*-rc-pr` because of hotfixes.
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: '24'
- name: Validate manifest against schema
run: |
make -C compute manifest-schema-validation
build-and-test-locally:
needs: [ meta, build-build-tools-image ]
# We do need to run this in `.*-rc-pr` because of hotfixes.

3
.gitignore vendored
View File

@@ -30,3 +30,6 @@ docker-compose/docker-compose-parallel.yml
# pgindent typedef lists
*.list
# Node
**/node_modules/

4
Cargo.lock generated
View File

@@ -6400,6 +6400,7 @@ dependencies = [
"itertools 0.10.5",
"jsonwebtoken",
"metrics",
"nix 0.30.1",
"once_cell",
"pageserver_api",
"parking_lot 0.12.1",
@@ -6407,6 +6408,7 @@ dependencies = [
"postgres-protocol",
"postgres_backend",
"postgres_ffi",
"postgres_ffi_types",
"postgres_versioninfo",
"pprof",
"pq_proto",
@@ -6451,7 +6453,7 @@ dependencies = [
"anyhow",
"const_format",
"pageserver_api",
"postgres_ffi",
"postgres_ffi_types",
"postgres_versioninfo",
"pq_proto",
"serde",

View File

@@ -2,7 +2,7 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package
# managers.
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install
# Supported PostgreSQL versions
POSTGRES_VERSIONS = v17 v16 v15 v14
@@ -14,7 +14,7 @@ POSTGRES_VERSIONS = v17 v16 v15 v14
# it is derived from BUILD_TYPE.
# All intermediate build artifacts are stored here.
BUILD_DIR := build
BUILD_DIR := $(ROOT_PROJECT_DIR)/build
ICU_PREFIX_DIR := /usr/local/icu
@@ -212,7 +212,7 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-C $(BUILD_DIR)/neon-v17 \
-C $(BUILD_DIR)/pgxn-v17/neon \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
@@ -220,11 +220,15 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
setup-pre-commit-hook:
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
build-tools/node_modules: build-tools/package.json
cd build-tools && $(if $(CI),npm ci,npm install)
touch build-tools/node_modules
.PHONY: lint-openapi-spec
lint-openapi-spec:
lint-openapi-spec: build-tools/node_modules
# operation-2xx-response: pageserver timeline delete returns 404 on success
find . -iname "openapi_spec.y*ml" -exec\
docker run --rm -v ${PWD}:/spec ghcr.io/redocly/cli:1.34.4\
npx --prefix=build-tools/ redocly\
--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\
--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\
lint {} \+

View File

@@ -35,7 +35,7 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
set -e && \
@@ -188,6 +188,12 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install node
ENV NODE_VERSION=24
RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
&& apt install -y nodejs \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install docker
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
@@ -311,14 +317,14 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
. "$HOME/.cargo/env" && \
cargo --version && rustup --version && \
rustup component add llvm-tools rustfmt clippy && \
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
--features postgres-bundled --no-default-features && \
cargo install rustfilt --locked --version ${RUSTFILT_VERSION} && \
cargo install cargo-hakari --locked --version ${CARGO_HAKARI_VERSION} && \
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
cargo install cargo-hack --locked --version ${CARGO_HACK_VERSION} && \
cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
cargo install diesel_cli --locked --version ${CARGO_DIESEL_CLI_VERSION} \
--features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git

3189
build-tools/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

8
build-tools/package.json Normal file
View File

@@ -0,0 +1,8 @@
{
"name": "build-tools",
"private": true,
"devDependencies": {
"@redocly/cli": "1.34.4",
"@sourcemeta/jsonschema": "10.0.0"
}
}

View File

@@ -50,9 +50,9 @@ jsonnetfmt-format:
jsonnetfmt --in-place $(jsonnet_files)
.PHONY: manifest-schema-validation
manifest-schema-validation: node_modules
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
manifest-schema-validation: ../build-tools/node_modules
npx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
node_modules: package.json
npm install
touch node_modules
../build-tools/node_modules: ../build-tools/package.json
cd ../build-tools && $(if $(CI),npm ci,npm install)
touch ../build-tools/node_modules

View File

@@ -9,7 +9,7 @@
#
# build-tools: This contains Rust compiler toolchain and other tools needed at compile
# time. This is also used for the storage builds. This image is defined in
# build-tools.Dockerfile.
# build-tools/Dockerfile.
#
# build-deps: Contains C compiler, other build tools, and compile-time dependencies
# needed to compile PostgreSQL and most extensions. (Some extensions need
@@ -115,7 +115,7 @@ ARG EXTENSIONS=all
FROM $BASE_IMAGE_SHA AS build-deps
ARG DEBIAN_VERSION
# Keep in sync with build-tools.Dockerfile
# Keep in sync with build-tools/Dockerfile
ENV PROTOC_VERSION=25.1
# Use strict mode for bash to catch errors early
@@ -170,7 +170,29 @@ RUN case $DEBIAN_VERSION in \
FROM build-deps AS pg-build
ARG PG_VERSION
COPY vendor/postgres-${PG_VERSION:?} postgres
COPY compute/patches/postgres_fdw.patch .
COPY compute/patches/pg_stat_statements_pg14-16.patch .
COPY compute/patches/pg_stat_statements_pg17.patch .
RUN cd postgres && \
# Apply patches to some contrib extensions
# For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.
# In vanilla Postgres this function is limited to Postgres role superuser.
# In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.
# We could add the additional grant statements to the Postgres repository but it would be hard to maintain,
# whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,
# so we do it here.
case "${PG_VERSION}" in \
"v14" | "v15" | "v16") \
patch -p1 < /pg_stat_statements_pg14-16.patch; \
;; \
"v17") \
patch -p1 < /pg_stat_statements_pg17.patch; \
;; \
*) \
# To do not forget to migrate patches to the next major version
echo "No contrib patches for this PostgreSQL version" && exit 1;; \
esac && \
patch -p1 < /postgres_fdw.patch && \
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
--with-icu --with-libxml --with-libxslt --with-lz4" && \
if [ "${PG_VERSION:?}" != "v14" ]; then \
@@ -184,8 +206,6 @@ RUN cd postgres && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
@@ -195,34 +215,7 @@ RUN cd postgres && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
# We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
# In vanilla postgres this function is limited to Postgres role superuser.
# In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
# We could add the additional grant statements to the postgres repository but it would be hard to maintain,
# whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
# so we do it here.
for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
filename=$(basename "$file"); \
# Note that there are no downgrade scripts for pg_stat_statements, so we \
# don't have to modify any downgrade paths or (much) older versions: we only \
# have to make sure every creation of the pg_stat_statements_reset function \
# also adds execute permissions to the neon_superuser.
case $filename in \
pg_stat_statements--1.4.sql) \
# pg_stat_statements_reset is first created with 1.4
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
;; \
pg_stat_statements--1.6--1.7.sql) \
# Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
;; \
pg_stat_statements--1.10--1.11.sql) \
# Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
;; \
esac; \
done;
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
# Set PATH for all the subsequent build steps
ENV PATH="/usr/local/pgsql/bin:$PATH"
@@ -1524,7 +1517,7 @@ WORKDIR /ext-src
COPY compute/patches/pg_duckdb_v031.patch .
COPY compute/patches/duckdb_v120.patch .
# pg_duckdb build requires source dir to be a git repo to get submodules
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:
# - extension management function duckdb.install_extension()
# - access to duckdb.extensions table and its sequence
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
@@ -1790,7 +1783,7 @@ RUN set -e \
#########################################################################################
FROM build-deps AS exporters
ARG TARGETARCH
# Keep sql_exporter version same as in build-tools.Dockerfile and
# Keep sql_exporter version same as in build-tools/Dockerfile and
# test_runner/regress/test_compute_metrics.py
# See comment on the top of the file regading `echo`, `-e` and `\n`
RUN if [ "$TARGETARCH" = "amd64" ]; then\

View File

@@ -1,7 +0,0 @@
{
"name": "neon-compute",
"private": true,
"dependencies": {
"@sourcemeta/jsonschema": "9.3.4"
}
}

View File

@@ -1,22 +1,26 @@
diff --git a/sql/anon.sql b/sql/anon.sql
index 0cdc769..b450327 100644
index 0cdc769..5eab1d6 100644
--- a/sql/anon.sql
+++ b/sql/anon.sql
@@ -1141,3 +1141,15 @@ $$
@@ -1141,3 +1141,19 @@ $$
-- TODO : https://en.wikipedia.org/wiki/L-diversity
-- TODO : https://en.wikipedia.org/wiki/T-closeness
+
+-- NEON Patches
+
+GRANT ALL ON SCHEMA anon to neon_superuser;
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ IF current_setting('server_version_num')::int >= 150000 THEN
+ GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
+ END IF;
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);
+ EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);
+
+ IF current_setting('server_version_num')::int >= 150000 THEN
+ EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);
+ END IF;
+END $$;
diff --git a/sql/init.sql b/sql/init.sql
index 7da6553..9b6164b 100644

View File

@@ -21,13 +21,21 @@ index 3235cc8..6b892bc 100644
include Makefile.global
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
index d777d76..af60106 100644
index d777d76..3b54396 100644
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);
+ EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);
+ EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);
+END $$;

View File

@@ -0,0 +1,34 @@
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
index 58cdf600fce..8be57a996f6 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
-- Don't want this to be available to non-superusers.
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
+END $$;
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
index 6fc3fed4c93..256345a8f79 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
-- Don't want this to be available to non-superusers.
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
+END $$;

View File

@@ -0,0 +1,52 @@
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
index 0bb2c397711..32764db1d8b 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
-- Don't want this to be available to non-superusers.
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);
+END $$;
\ No newline at end of file
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
index 58cdf600fce..8be57a996f6 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
-- Don't want this to be available to non-superusers.
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
+END $$;
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
index 6fc3fed4c93..256345a8f79 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
-- Don't want this to be available to non-superusers.
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
+END $$;

View File

@@ -0,0 +1,17 @@
diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql
index a0f0fc1bf45..ee077f2eea6 100644
--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql
+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql
@@ -16,3 +16,12 @@ LANGUAGE C STRICT;
CREATE FOREIGN DATA WRAPPER postgres_fdw
HANDLER postgres_fdw_handler
VALIDATOR postgres_fdw_validator;
+
+DO $$
+DECLARE
+ privileged_role_name text;
+BEGIN
+ privileged_role_name := current_setting('neon.privileged_role_name');
+
+ EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);
+END $$;

View File

@@ -87,6 +87,14 @@ struct Cli {
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
pub connstr: String,
#[arg(
long,
default_value = "neon_superuser",
value_name = "PRIVILEGED_ROLE_NAME",
value_parser = Self::parse_privileged_role_name
)]
pub privileged_role_name: String,
#[cfg(target_os = "linux")]
#[arg(long, default_value = "neon-postgres")]
pub cgroup: String,
@@ -149,6 +157,21 @@ impl Cli {
Ok(url)
}
/// For simplicity, we do not escape `privileged_role_name` anywhere in the code.
/// Since it's a system role, which we fully control, that's fine. Still, let's
/// validate it to avoid any surprises.
fn parse_privileged_role_name(value: &str) -> Result<String> {
use regex::Regex;
let pattern = Regex::new(r"^[a-z_]+$").unwrap();
if !pattern.is_match(value) {
bail!("--privileged-role-name can only contain lowercase letters and underscores")
}
Ok(value.to_string())
}
}
fn main() -> Result<()> {
@@ -178,6 +201,7 @@ fn main() -> Result<()> {
ComputeNodeParams {
compute_id: cli.compute_id,
connstr,
privileged_role_name: cli.privileged_role_name.clone(),
pgdata: cli.pgdata.clone(),
pgbin: cli.pgbin.clone(),
pgversion: get_pg_version_string(&cli.pgbin),
@@ -327,4 +351,49 @@ mod test {
])
.expect_err("URL parameters are not allowed");
}
#[test]
fn verify_privileged_role_name() {
// Valid name
let cli = Cli::parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--privileged-role-name",
"my_superuser",
]);
assert_eq!(cli.privileged_role_name, "my_superuser");
// Invalid names
Cli::try_parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--privileged-role-name",
"NeonSuperuser",
])
.expect_err("uppercase letters are not allowed");
Cli::try_parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--privileged-role-name",
"$'neon_superuser",
])
.expect_err("special characters are not allowed");
Cli::try_parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--privileged-role-name",
"",
])
.expect_err("empty name is not allowed");
}
}

View File

@@ -75,12 +75,20 @@ const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;
/// Static configuration params that don't change after startup. These mostly
/// come from the CLI args, or are derived from them.
#[derive(Clone, Debug)]
pub struct ComputeNodeParams {
/// The ID of the compute
pub compute_id: String,
// Url type maintains proper escaping
/// Url type maintains proper escaping
pub connstr: url::Url,
/// The name of the 'weak' superuser role, which we give to the users.
/// It follows the allow list approach, i.e., we take a standard role
/// and grant it extra permissions with explicit GRANTs here and there,
/// and core patches.
pub privileged_role_name: String,
pub resize_swap_on_bind: bool,
pub set_disk_quota_for_fs: Option<String>,
@@ -1316,9 +1324,7 @@ impl ComputeNode {
// In case of error, log and fail the check, but don't crash.
// We're playing it safe because these errors could be transient
// and we don't yet retry. Also being careful here allows us to
// be backwards compatible with safekeepers that don't have the
// TIMELINE_STATUS API yet.
// and we don't yet retry.
if responses.len() < quorum {
error!(
"failed sync safekeepers check {:?} {:?} {:?}",
@@ -1421,6 +1427,7 @@ impl ComputeNode {
self.create_pgdata()?;
config::write_postgres_conf(
pgdata_path,
&self.params,
&pspec.spec,
self.params.internal_http_port,
tls_config,
@@ -1761,6 +1768,7 @@ impl ComputeNode {
}
// Run migrations separately to not hold up cold starts
let params = self.params.clone();
tokio::spawn(async move {
let mut conf = conf.as_ref().clone();
conf.application_name("compute_ctl:migrations");
@@ -1772,7 +1780,7 @@ impl ComputeNode {
eprintln!("connection error: {e}");
}
});
if let Err(e) = handle_migrations(&mut client).await {
if let Err(e) = handle_migrations(params, &mut client).await {
error!("Failed to run migrations: {}", e);
}
}
@@ -1851,6 +1859,7 @@ impl ComputeNode {
let pgdata_path = Path::new(&self.params.pgdata);
config::write_postgres_conf(
pgdata_path,
&self.params,
&spec,
self.params.internal_http_port,
tls_config,
@@ -2509,7 +2518,7 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
);
}
Err(err) => error!("could not get installed extensions: {err:?}"),
Err(err) => error!("could not get installed extensions: {err}"),
}
Ok(())
}

View File

@@ -9,6 +9,7 @@ use std::path::Path;
use compute_api::responses::TlsConfig;
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
use crate::compute::ComputeNodeParams;
use crate::pg_helpers::{
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
};
@@ -41,6 +42,7 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
/// Create or completely rewrite configuration file specified by `path`
pub fn write_postgres_conf(
pgdata_path: &Path,
params: &ComputeNodeParams,
spec: &ComputeSpec,
extension_server_port: u16,
tls_config: &Option<TlsConfig>,
@@ -203,6 +205,12 @@ pub fn write_postgres_conf(
}
}
writeln!(
file,
"neon.privileged_role_name={}",
escape_conf_value(params.privileged_role_name.as_str())
)?;
// If there are any extra options in the 'settings' field, append those
if spec.cluster.settings.is_some() {
writeln!(file, "# Managed by compute_ctl: begin")?;

View File

@@ -2,6 +2,7 @@ use std::collections::HashMap;
use anyhow::Result;
use compute_api::responses::{InstalledExtension, InstalledExtensions};
use tokio_postgres::error::Error as PostgresError;
use tokio_postgres::{Client, Config, NoTls};
use crate::metrics::INSTALLED_EXTENSIONS;
@@ -10,7 +11,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
/// and to make database listing query here more explicit.
///
/// Limit the number of databases to 500 to avoid excessive load.
async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
// `pg_database.datconnlimit = -2` means that the database is in the
// invalid state
let databases = client
@@ -37,7 +38,9 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
/// Same extension can be installed in multiple databases with different versions,
/// so we report a separate metric (number of databases where it is installed)
/// for each extension version.
pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
pub async fn get_installed_extensions(
mut conf: Config,
) -> Result<InstalledExtensions, PostgresError> {
conf.application_name("compute_ctl:get_installed_extensions");
let databases: Vec<String> = {
let (mut client, connection) = conf.connect(NoTls).await?;

View File

@@ -0,0 +1 @@
ALTER ROLE {privileged_role_name} BYPASSRLS;

View File

@@ -1 +0,0 @@
ALTER ROLE neon_superuser BYPASSRLS;

View File

@@ -15,7 +15,7 @@ DO $$
DECLARE
role_name text;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
@@ -23,7 +23,7 @@ BEGIN
FOR role_name IN SELECT rolname FROM pg_roles
WHERE
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';

View File

@@ -1,6 +1,6 @@
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
END IF;
END $$;

View File

@@ -1 +0,0 @@
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;

View File

@@ -0,0 +1 @@
GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;

View File

@@ -1,4 +1,4 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
-- interacted with by neon_superuser without permission issues.
-- interacted with by {privileged_role_name} without permission issues.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};

View File

@@ -1,4 +1,4 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
-- interacted with by neon_superuser without permission issues.
-- interacted with by {privileged_role_name} without permission issues.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};

View File

@@ -1,3 +1,3 @@
-- SKIP: Moved inline to the handle_grants() functions.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;

View File

@@ -1,3 +1,3 @@
-- SKIP: Moved inline to the handle_grants() functions.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;

View File

@@ -1,7 +1,7 @@
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
END IF;
END $$;

View File

@@ -1 +0,0 @@
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;

View File

@@ -0,0 +1 @@
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};

View File

@@ -1 +0,0 @@
GRANT pg_signal_backend TO neon_superuser WITH ADMIN OPTION;

View File

@@ -0,0 +1 @@
GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;

View File

@@ -9,6 +9,7 @@ use reqwest::StatusCode;
use tokio_postgres::Client;
use tracing::{error, info, instrument};
use crate::compute::ComputeNodeParams;
use crate::config;
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
use crate::migration::MigrationRunner;
@@ -169,7 +170,7 @@ pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
}
#[instrument(skip_all)]
pub async fn handle_migrations(client: &mut Client) -> Result<()> {
pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -> Result<()> {
info!("handle migrations");
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -178,26 +179,59 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {
// Add new migrations in numerical order.
let migrations = [
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
include_str!("./migrations/0002-alter_roles.sql"),
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
include_str!(
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
&format!(
include_str!("./migrations/0001-add_bypass_rls_to_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
include_str!(
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
&format!(
include_str!("./migrations/0002-alter_roles.sql"),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!("./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!("./migrations/0004-grant_pg_monitor_to_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!("./migrations/0005-grant_all_on_tables_to_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!("./migrations/0006-grant_all_on_sequences_to_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!(
"./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql"
),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!(
"./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql"
),
privileged_role_name = params.privileged_role_name
),
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
include_str!(
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
&format!(
include_str!(
"./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql"
),
privileged_role_name = params.privileged_role_name
),
include_str!(
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
&format!(
include_str!(
"./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql"
),
privileged_role_name = params.privileged_role_name
),
&format!(
include_str!("./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
include_str!("./migrations/0012-grant_pg_signal_backend_to_neon_superuser.sql"),
];
MigrationRunner::new(client, &migrations)

View File

@@ -13,14 +13,14 @@ use tokio_postgres::Client;
use tokio_postgres::error::SqlState;
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
use crate::compute::{ComputeNode, ComputeState};
use crate::compute::{ComputeNode, ComputeNodeParams, ComputeState};
use crate::pg_helpers::{
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
get_existing_roles_async,
};
use crate::spec_apply::ApplySpecPhase::{
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension,
CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,
DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
RunInEachDatabase,
@@ -49,6 +49,7 @@ impl ComputeNode {
// Proceed with post-startup configuration. Note, that order of operations is important.
let client = Self::get_maintenance_client(&conf).await?;
let spec = spec.clone();
let params = Arc::new(self.params.clone());
let databases = get_existing_dbs_async(&client).await?;
let roles = get_existing_roles_async(&client)
@@ -157,6 +158,7 @@ impl ComputeNode {
let conf = Arc::new(conf);
let fut = Self::apply_spec_sql_db(
params.clone(),
spec.clone(),
conf,
ctx.clone(),
@@ -185,7 +187,7 @@ impl ComputeNode {
}
for phase in [
CreateNeonSuperuser,
CreatePrivilegedRole,
DropInvalidDatabases,
RenameRoles,
CreateAndAlterRoles,
@@ -195,6 +197,7 @@ impl ComputeNode {
] {
info!("Applying phase {:?}", &phase);
apply_operations(
params.clone(),
spec.clone(),
ctx.clone(),
jwks_roles.clone(),
@@ -243,6 +246,7 @@ impl ComputeNode {
}
let fut = Self::apply_spec_sql_db(
params.clone(),
spec.clone(),
conf,
ctx.clone(),
@@ -293,6 +297,7 @@ impl ComputeNode {
for phase in phases {
debug!("Applying phase {:?}", &phase);
apply_operations(
params.clone(),
spec.clone(),
ctx.clone(),
jwks_roles.clone(),
@@ -313,7 +318,9 @@ impl ComputeNode {
/// May opt to not connect to databases that don't have any scheduled
/// operations. The function is concurrency-controlled with the provided
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
#[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring
async fn apply_spec_sql_db(
params: Arc<ComputeNodeParams>,
spec: Arc<ComputeSpec>,
conf: Arc<tokio_postgres::Config>,
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
@@ -328,6 +335,7 @@ impl ComputeNode {
for subphase in subphases {
apply_operations(
params.clone(),
spec.clone(),
ctx.clone(),
jwks_roles.clone(),
@@ -467,7 +475,7 @@ pub enum PerDatabasePhase {
#[derive(Clone, Debug)]
pub enum ApplySpecPhase {
CreateNeonSuperuser,
CreatePrivilegedRole,
DropInvalidDatabases,
RenameRoles,
CreateAndAlterRoles,
@@ -510,6 +518,7 @@ pub struct MutableApplyContext {
/// - No timeouts have (yet) been implemented.
/// - The caller is responsible for limiting and/or applying concurrency.
pub async fn apply_operations<'a, Fut, F>(
params: Arc<ComputeNodeParams>,
spec: Arc<ComputeSpec>,
ctx: Arc<RwLock<MutableApplyContext>>,
jwks_roles: Arc<HashSet<String>>,
@@ -527,7 +536,7 @@ where
debug!("Processing phase {:?}", &apply_spec_phase);
let ctx = ctx;
let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
let mut ops = get_operations(&params, &spec, &ctx, &jwks_roles, &apply_spec_phase)
.await?
.peekable();
@@ -588,14 +597,18 @@ where
/// sort/merge/batch execution, but for now this is a nice way to improve
/// batching behavior of the commands.
async fn get_operations<'a>(
params: &'a ComputeNodeParams,
spec: &'a ComputeSpec,
ctx: &'a RwLock<MutableApplyContext>,
jwks_roles: &'a HashSet<String>,
apply_spec_phase: &'a ApplySpecPhase,
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
match apply_spec_phase {
ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
query: include_str!("sql/create_neon_superuser.sql").to_string(),
ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {
query: format!(
include_str!("sql/create_privileged_role.sql"),
privileged_role_name = params.privileged_role_name
),
comment: None,
}))),
ApplySpecPhase::DropInvalidDatabases => {
@@ -697,8 +710,9 @@ async fn get_operations<'a>(
None => {
let query = if !jwks_roles.contains(role.name.as_str()) {
format!(
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}",
role.name.pg_quote(),
params.privileged_role_name,
role.to_pg_options(),
)
} else {
@@ -849,8 +863,9 @@ async fn get_operations<'a>(
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
query: format!(
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
db.name.pg_quote()
"GRANT ALL PRIVILEGES ON DATABASE {} TO {}",
db.name.pg_quote(),
params.privileged_role_name
),
comment: None,
},

View File

@@ -1,8 +0,0 @@
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
THEN
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
END IF;
END
$$;

View File

@@ -0,0 +1,8 @@
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
THEN
CREATE ROLE {privileged_role_name} CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
END IF;
END
$$;

View File

@@ -631,6 +631,10 @@ struct EndpointCreateCmdArgs {
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
)]
allow_multiple: bool,
/// Only allow changing it on creation
#[clap(long, help = "Name of the privileged role for the endpoint")]
privileged_role_name: Option<String>,
}
#[derive(clap::Args)]
@@ -1480,6 +1484,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
args.grpc,
!args.update_catalog,
false,
args.privileged_role_name.clone(),
)?;
}
EndpointCmd::Start(args) => {

View File

@@ -102,6 +102,7 @@ pub struct EndpointConf {
features: Vec<ComputeFeature>,
cluster: Option<Cluster>,
compute_ctl_config: ComputeCtlConfig,
privileged_role_name: Option<String>,
}
//
@@ -202,6 +203,7 @@ impl ComputeControlPlane {
grpc: bool,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
privileged_role_name: Option<String>,
) -> Result<Arc<Endpoint>> {
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
@@ -239,6 +241,7 @@ impl ComputeControlPlane {
features: vec![],
cluster: None,
compute_ctl_config: compute_ctl_config.clone(),
privileged_role_name: privileged_role_name.clone(),
});
ep.create_endpoint_dir()?;
@@ -260,6 +263,7 @@ impl ComputeControlPlane {
features: vec![],
cluster: None,
compute_ctl_config,
privileged_role_name,
})?,
)?;
std::fs::write(
@@ -335,6 +339,9 @@ pub struct Endpoint {
/// The compute_ctl config for the endpoint's compute.
compute_ctl_config: ComputeCtlConfig,
/// The name of the privileged role for the endpoint.
privileged_role_name: Option<String>,
}
#[derive(PartialEq, Eq)]
@@ -435,6 +442,7 @@ impl Endpoint {
features: conf.features,
cluster: conf.cluster,
compute_ctl_config: conf.compute_ctl_config,
privileged_role_name: conf.privileged_role_name,
})
}
@@ -467,7 +475,7 @@ impl Endpoint {
conf.append("max_connections", "100");
conf.append("wal_level", "logical");
// wal_sender_timeout is the maximum time to wait for WAL replication.
// It also defines how often the walreciever will send a feedback message to the wal sender.
// It also defines how often the walreceiver will send a feedback message to the wal sender.
conf.append("wal_sender_timeout", "5s");
conf.append("listen_addresses", &self.pg_address.ip().to_string());
conf.append("port", &self.pg_address.port().to_string());
@@ -862,6 +870,10 @@ impl Endpoint {
cmd.arg("--dev");
}
if let Some(privileged_role_name) = self.privileged_role_name.clone() {
cmd.args(["--privileged-role-name", &privileged_role_name]);
}
let child = cmd.spawn()?;
// set up a scopeguard to kill & wait for the child in case we panic or bail below
let child = scopeguard::guard(child, |mut child| {

View File

@@ -476,6 +476,7 @@ async fn main() -> anyhow::Result<()> {
listen_http_port,
listen_https_port,
availability_zone_id: AvailabilityZone(availability_zone_id),
node_ip_addr: None,
}),
)
.await?;

View File

@@ -75,7 +75,7 @@ CLI examples:
* AWS S3 : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
For local S3 installations, refer to the their documentation for name format and credentials.
For local S3 installations, refer to their documentation for name format and credentials.
Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
Required sections are:

View File

@@ -1,5 +1,6 @@
use std::collections::{HashMap, HashSet};
use std::fmt::Display;
use std::net::IpAddr;
use std::str::FromStr;
use std::time::{Duration, Instant};
@@ -60,6 +61,11 @@ pub struct NodeRegisterRequest {
pub listen_https_port: Option<u16>,
pub availability_zone_id: AvailabilityZone,
// Reachable IP address of the PS/SK registering, if known.
// Hadron Cluster Coordiantor will update the DNS record of the registering node
// with this IP address.
pub node_ip_addr: Option<IpAddr>,
}
#[derive(Serialize, Deserialize)]
@@ -545,6 +551,39 @@ pub struct SafekeeperDescribeResponse {
pub scheduling_policy: SkSchedulingPolicy,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct TimelineSafekeeperPeer {
pub node_id: NodeId,
pub listen_http_addr: String,
pub http_port: i32,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct SCSafekeeperTimeline {
// SC does not know the tenant id.
pub timeline_id: TimelineId,
pub peers: Vec<NodeId>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct SCSafekeeperTimelinesResponse {
pub timelines: Vec<SCSafekeeperTimeline>,
pub safekeeper_peers: Vec<TimelineSafekeeperPeer>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct SafekeeperTimeline {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub peers: Vec<NodeId>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct SafekeeperTimelinesResponse {
pub timelines: Vec<SafekeeperTimeline>,
pub safekeeper_peers: Vec<TimelineSafekeeperPeer>,
}
#[derive(Serialize, Deserialize, Clone)]
pub struct SafekeeperSchedulingPolicyRequest {
pub scheduling_policy: SkSchedulingPolicy,

View File

@@ -110,7 +110,6 @@ fn main() -> anyhow::Result<()> {
.allowlist_type("XLogRecPtr")
.allowlist_type("XLogSegNo")
.allowlist_type("TimeLineID")
.allowlist_type("TimestampTz")
.allowlist_type("MultiXactId")
.allowlist_type("MultiXactOffset")
.allowlist_type("MultiXactStatus")

View File

@@ -227,8 +227,7 @@ pub mod walrecord;
// Export some widely used datatypes that are unlikely to change across Postgres versions
pub use v14::bindings::{
BlockNumber, CheckPoint, ControlFileData, MultiXactId, OffsetNumber, Oid, PageHeaderData,
RepOriginId, TimeLineID, TimestampTz, TransactionId, XLogRecPtr, XLogRecord, XLogSegNo, uint32,
uint64,
RepOriginId, TimeLineID, TransactionId, XLogRecPtr, XLogRecord, XLogSegNo, uint32, uint64,
};
// Likewise for these, although the assumption that these don't change is a little more iffy.
pub use v14::bindings::{MultiXactOffset, MultiXactStatus};

View File

@@ -4,13 +4,14 @@
//! TODO: Generate separate types for each supported PG version
use bytes::{Buf, Bytes};
use postgres_ffi_types::TimestampTz;
use serde::{Deserialize, Serialize};
use utils::bin_ser::DeserializeError;
use utils::lsn::Lsn;
use crate::{
BLCKSZ, BlockNumber, MultiXactId, MultiXactOffset, MultiXactStatus, Oid, PgMajorVersion,
RepOriginId, TimestampTz, TransactionId, XLOG_SIZE_OF_XLOG_RECORD, XLogRecord, pg_constants,
RepOriginId, TransactionId, XLOG_SIZE_OF_XLOG_RECORD, XLogRecord, pg_constants,
};
#[repr(C)]
@@ -863,7 +864,8 @@ pub mod v17 {
XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange,
rm_neon,
};
pub use crate::{TimeLineID, TimestampTz};
pub use crate::TimeLineID;
pub use postgres_ffi_types::TimestampTz;
#[repr(C)]
#[derive(Debug)]

View File

@@ -9,10 +9,11 @@
use super::super::waldecoder::WalStreamDecoder;
use super::bindings::{
CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, FullTransactionId, TimeLineID, TimestampTz,
CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, FullTransactionId, TimeLineID,
XLogLongPageHeaderData, XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,
MY_PGVERSION
};
use postgres_ffi_types::TimestampTz;
use super::wal_generator::LogicalMessageGenerator;
use crate::pg_constants;
use crate::PG_TLI;

View File

@@ -11,3 +11,4 @@ pub mod forknum;
pub type Oid = u32;
pub type RepOriginId = u16;
pub type TimestampTz = i64;

View File

@@ -9,7 +9,7 @@ anyhow.workspace = true
const_format.workspace = true
serde.workspace = true
serde_json.workspace = true
postgres_ffi.workspace = true
postgres_ffi_types.workspace = true
postgres_versioninfo.workspace = true
pq_proto.workspace = true
tokio.workspace = true

View File

@@ -3,7 +3,7 @@
use std::net::SocketAddr;
use pageserver_api::shard::ShardIdentity;
use postgres_ffi::TimestampTz;
use postgres_ffi_types::TimestampTz;
use postgres_versioninfo::PgVersionId;
use serde::{Deserialize, Serialize};
use tokio::time::Instant;

View File

@@ -0,0 +1,73 @@
use std::env::{VarError, var};
use std::error::Error;
use std::net::IpAddr;
use std::str::FromStr;
/// Name of the environment variable containing the reachable IP address of the node. If set, the IP address contained in this
/// environment variable is used as the reachable IP address of the pageserver or safekeeper node during node registration.
/// In a Kubernetes environment, this environment variable should be set by Kubernetes to the Pod IP (specified in the Pod
/// template).
pub const HADRON_NODE_IP_ADDRESS: &str = "HADRON_NODE_IP_ADDRESS";
/// Read the reachable IP address of this page server from env var HADRON_NODE_IP_ADDRESS.
/// In Kubernetes this environment variable is set to the Pod IP (specified in the Pod template).
pub fn read_node_ip_addr_from_env() -> Result<Option<IpAddr>, Box<dyn Error>> {
match var(HADRON_NODE_IP_ADDRESS) {
Ok(v) => {
if let Ok(addr) = IpAddr::from_str(&v) {
Ok(Some(addr))
} else {
Err(format!("Invalid IP address string: {v}. Cannot be parsed as either an IPv4 or an IPv6 address.").into())
}
}
Err(VarError::NotPresent) => Ok(None),
Err(e) => Err(e.into()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env;
use std::net::{Ipv4Addr, Ipv6Addr};
#[test]
fn test_read_node_ip_addr_from_env() {
// SAFETY: test code
unsafe {
// Test with a valid IPv4 address
env::set_var(HADRON_NODE_IP_ADDRESS, "192.168.1.1");
let result = read_node_ip_addr_from_env().unwrap();
assert_eq!(result, Some(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))));
// Test with a valid IPv6 address
env::set_var(
HADRON_NODE_IP_ADDRESS,
"2001:0db8:85a3:0000:0000:8a2e:0370:7334",
);
}
let result = read_node_ip_addr_from_env().unwrap();
assert_eq!(
result,
Some(IpAddr::V6(
Ipv6Addr::from_str("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap()
))
);
// Test with an invalid IP address
// SAFETY: test code
unsafe {
env::set_var(HADRON_NODE_IP_ADDRESS, "invalid_ip");
}
let result = read_node_ip_addr_from_env();
assert!(result.is_err());
// Test with no environment variable set
// SAFETY: test code
unsafe {
env::remove_var(HADRON_NODE_IP_ADDRESS);
}
let result = read_node_ip_addr_from_env().unwrap();
assert_eq!(result, None);
}
}

View File

@@ -26,6 +26,9 @@ pub mod auth;
// utility functions and helper traits for unified unique id generation/serialization etc.
pub mod id;
// utility functions to obtain reachable IP addresses in PS/SK nodes.
pub mod ip_address;
pub mod shard;
mod hex;

View File

@@ -2,7 +2,8 @@
use bytes::Bytes;
use postgres_ffi::walrecord::{MultiXactMember, describe_postgres_wal_record};
use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
use postgres_ffi::{MultiXactId, MultiXactOffset, TransactionId};
use postgres_ffi_types::TimestampTz;
use serde::{Deserialize, Serialize};
use utils::bin_ser::DeserializeError;

View File

@@ -431,7 +431,7 @@ pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
let empty_wal_rate_limiter = crate::bindings::WalRateLimiter {
should_limit: crate::bindings::pg_atomic_uint32 { value: 0 },
sent_bytes: 0,
last_recorded_time_us: 0,
last_recorded_time_us: crate::bindings::pg_atomic_uint64 { value: 0 },
};
crate::bindings::WalproposerShmemState {

View File

@@ -194,6 +194,7 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
listen_http_port: m.http_port,
listen_https_port: m.https_port,
availability_zone_id: az_id.expect("Checked above"),
node_ip_addr: None,
})
}
Err(e) => {

View File

@@ -1,5 +1,5 @@
//! The validator is responsible for validating DeletionLists for execution,
//! based on whethe the generation in the DeletionList is still the latest
//! based on whether the generation in the DeletionList is still the latest
//! generation for a tenant.
//!
//! The purpose of validation is to ensure split-brain safety in the cluster

View File

@@ -10,6 +10,7 @@ use std::sync::Arc;
use std::time::Duration;
use anyhow::{Context, Result, anyhow};
use bytes::Bytes;
use enumset::EnumSet;
use futures::future::join_all;
use futures::{StreamExt, TryFutureExt};
@@ -46,6 +47,7 @@ use pageserver_api::shard::{ShardCount, TenantShardId};
use postgres_ffi::PgMajorVersion;
use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};
use scopeguard::defer;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tenant_size_model::svg::SvgBranchKind;
use tenant_size_model::{SizeResult, StorageModel};
@@ -57,6 +59,7 @@ use utils::auth::SwappableJwtAuth;
use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use wal_decoder::models::record::NeonWalRecord;
use crate::config::PageServerConf;
use crate::context;
@@ -77,6 +80,7 @@ use crate::tenant::remote_timeline_client::{
};
use crate::tenant::secondary::SecondaryController;
use crate::tenant::size::ModelInputs;
use crate::tenant::storage_layer::ValuesReconstructState;
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
use crate::tenant::timeline::layer_manager::LayerManagerLockHolder;
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
@@ -2353,6 +2357,7 @@ async fn timeline_compact_handler(
flags,
sub_compaction,
sub_compaction_max_job_size_mb,
gc_compaction_do_metadata_compaction: false,
};
let scheduled = compact_request
@@ -2708,6 +2713,16 @@ async fn deletion_queue_flush(
}
}
/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct GetPageResponse {
pub page: Bytes,
pub layers_visited: u32,
pub delta_layers_visited: u32,
pub records: Vec<(Lsn, NeonWalRecord)>,
pub img: Option<(Lsn, Bytes)>,
}
async fn getpage_at_lsn_handler(
request: Request<Body>,
cancel: CancellationToken,
@@ -2758,21 +2773,24 @@ async fn getpage_at_lsn_handler_inner(
// Use last_record_lsn if no lsn is provided
let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
let page = timeline.get(key.0, lsn, &ctx).await?;
if touch {
json_response(StatusCode::OK, ())
} else {
Result::<_, ApiError>::Ok(
Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "application/octet-stream")
.body(hyper::Body::from(page))
.unwrap(),
)
let mut reconstruct_state = ValuesReconstructState::new_with_debug(IoConcurrency::sequential());
let page = timeline.debug_get(key.0, lsn, &ctx, &mut reconstruct_state).await?;
let response = GetPageResponse {
page,
layers_visited: reconstruct_state.get_layers_visited(),
delta_layers_visited: reconstruct_state.get_delta_layers_visited(),
records: reconstruct_state.debug_state.records.clone(),
img: reconstruct_state.debug_state.img.clone(),
};
json_response(StatusCode::OK, response)
}
}
.instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.instrument(info_span!("timeline_debug_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
.await
}

View File

@@ -25,9 +25,9 @@ use pageserver_api::keyspace::{KeySpaceRandomAccum, SparseKeySpace};
use pageserver_api::models::RelSizeMigration;
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use pageserver_api::shard::ShardIdentity;
use postgres_ffi::{BLCKSZ, PgMajorVersion, TimestampTz, TransactionId};
use postgres_ffi::{BLCKSZ, PgMajorVersion, TransactionId};
use postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi_types::{Oid, RepOriginId};
use postgres_ffi_types::{Oid, RepOriginId, TimestampTz};
use serde::{Deserialize, Serialize};
use strum::IntoEnumIterator;
use tokio_util::sync::CancellationToken;

View File

@@ -9216,7 +9216,11 @@ mod tests {
let cancel = CancellationToken::new();
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -9299,7 +9303,11 @@ mod tests {
guard.cutoffs.space = Lsn(0x40);
}
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -9836,7 +9844,11 @@ mod tests {
let cancel = CancellationToken::new();
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -9871,7 +9883,11 @@ mod tests {
guard.cutoffs.space = Lsn(0x40);
}
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -10446,7 +10462,7 @@ mod tests {
&cancel,
CompactOptions {
flags: dryrun_flags,
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -10457,14 +10473,22 @@ mod tests {
verify_result().await;
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
// compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -10483,14 +10507,22 @@ mod tests {
guard.cutoffs.space = Lsn(0x38);
}
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result
// not increasing the GC horizon and compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -10695,7 +10727,7 @@ mod tests {
&cancel,
CompactOptions {
flags: dryrun_flags,
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -10706,14 +10738,22 @@ mod tests {
verify_result().await;
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
// compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -10913,7 +10953,11 @@ mod tests {
let cancel = CancellationToken::new();
branch_tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
@@ -10926,7 +10970,7 @@ mod tests {
&cancel,
CompactOptions {
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x40))),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11594,7 +11638,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(0)..get_key(2)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11641,7 +11685,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(2)..get_key(4)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11693,7 +11737,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(4)..get_key(9)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11744,7 +11788,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(9)..get_key(10)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -11800,7 +11844,7 @@ mod tests {
CompactOptions {
flags: EnumSet::new(),
compact_key_range: Some((get_key(0)..get_key(10)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12071,7 +12115,7 @@ mod tests {
&cancel,
CompactOptions {
compact_lsn_range: Some(CompactLsnRange::above(Lsn(0x28))),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12106,7 +12150,11 @@ mod tests {
// compact again
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -12325,7 +12373,7 @@ mod tests {
CompactOptions {
compact_key_range: Some((get_key(0)..get_key(2)).into()),
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x28)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12371,7 +12419,7 @@ mod tests {
CompactOptions {
compact_key_range: Some((get_key(3)..get_key(8)).into()),
compact_lsn_range: Some((Lsn(0x28)..Lsn(0x40)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12419,7 +12467,7 @@ mod tests {
CompactOptions {
compact_key_range: Some((get_key(0)..get_key(5)).into()),
compact_lsn_range: Some((Lsn(0x20)..Lsn(0x50)).into()),
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)
@@ -12454,7 +12502,11 @@ mod tests {
// final full compaction
tline
.compact_with_gc(&cancel, CompactOptions::default(), &ctx)
.compact_with_gc(
&cancel,
CompactOptions::default_for_gc_compaction_unit_tests(),
&ctx,
)
.await
.unwrap();
verify_result().await;
@@ -12564,7 +12616,7 @@ mod tests {
CompactOptions {
compact_key_range: None,
compact_lsn_range: None,
..Default::default()
..CompactOptions::default_for_gc_compaction_unit_tests()
},
&ctx,
)

View File

@@ -75,7 +75,7 @@ where
/// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
/// call, to collect more records.
///
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone)]
pub(crate) struct ValueReconstructState {
pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
pub(crate) img: Option<(Lsn, Bytes)>,
@@ -308,6 +308,9 @@ pub struct ValuesReconstructState {
layers_visited: u32,
delta_layers_visited: u32,
pub(crate) enable_debug: bool,
pub(crate) debug_state: ValueReconstructState,
pub(crate) io_concurrency: IoConcurrency,
num_active_ios: Arc<AtomicUsize>,
@@ -657,6 +660,23 @@ impl ValuesReconstructState {
layers_visited: 0,
delta_layers_visited: 0,
io_concurrency,
enable_debug: false,
debug_state: ValueReconstructState::default(),
num_active_ios: Arc::new(AtomicUsize::new(0)),
read_path: None,
}
}
pub(crate) fn new_with_debug(io_concurrency: IoConcurrency) -> Self {
Self {
keys: HashMap::new(),
keys_done: KeySpaceRandomAccum::new(),
keys_with_image_coverage: None,
layers_visited: 0,
delta_layers_visited: 0,
io_concurrency,
enable_debug: true,
debug_state: ValueReconstructState::default(),
num_active_ios: Arc::new(AtomicUsize::new(0)),
read_path: None,
}
@@ -670,6 +690,12 @@ impl ValuesReconstructState {
self.io_concurrency.spawn_io(fut).await;
}
pub(crate) fn set_debug_state(&mut self, debug_state: &ValueReconstructState) {
if self.enable_debug {
self.debug_state = debug_state.clone();
}
}
pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
self.layers_visited += 1;
if let ReadableLayer::PersistentLayer(layer) = layer {

View File

@@ -939,6 +939,20 @@ pub(crate) struct CompactOptions {
/// Set job size for the GC compaction.
/// This option is only used by GC compaction.
pub sub_compaction_max_job_size_mb: Option<u64>,
/// Only for GC compaction.
/// If set, the compaction will compact the metadata layers. Should be only set to true in unit tests
/// because metadata compaction is not fully supported yet.
pub gc_compaction_do_metadata_compaction: bool,
}
impl CompactOptions {
#[cfg(test)]
pub fn default_for_gc_compaction_unit_tests() -> Self {
Self {
gc_compaction_do_metadata_compaction: true,
..Default::default()
}
}
}
impl std::fmt::Debug for Timeline {
@@ -1253,6 +1267,57 @@ impl Timeline {
}
}
#[inline(always)]
pub(crate) async fn debug_get(
&self,
key: Key,
lsn: Lsn,
ctx: &RequestContext,
reconstruct_state: &mut ValuesReconstructState,
) -> Result<Bytes, PageReconstructError> {
if !lsn.is_valid() {
return Err(PageReconstructError::Other(anyhow::anyhow!("Invalid LSN")));
}
// This check is debug-only because of the cost of hashing, and because it's a double-check: we
// already checked the key against the shard_identity when looking up the Timeline from
// page_service.
debug_assert!(!self.shard_identity.is_key_disposable(&key));
let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key..key.next()), lsn);
let vectored_res = self
.debug_get_vectored_impl(query, reconstruct_state, ctx)
.await;
let key_value = vectored_res?.pop_first();
match key_value {
Some((got_key, value)) => {
if got_key != key {
error!(
"Expected {}, but singular vectored get returned {}",
key, got_key
);
Err(PageReconstructError::Other(anyhow!(
"Singular vectored get returned wrong key"
)))
} else {
value
}
}
None => Err(PageReconstructError::MissingKey(Box::new(
MissingKeyError {
keyspace: KeySpace::single(key..key.next()),
shard: self.shard_identity.get_shard_number(&key),
original_hwm_lsn: lsn,
ancestor_lsn: None,
backtrace: None,
read_path: None,
query: None,
},
))),
}
}
pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;
/// Look up multiple page versions at a given LSN
@@ -1547,6 +1612,98 @@ impl Timeline {
Ok(results)
}
// A copy of the get_vectored_impl method except that we store the image and wal records into `reconstruct_state`.
// This is only used in the http getpage call for debugging purpose.
pub(super) async fn debug_get_vectored_impl(
&self,
query: VersionedKeySpaceQuery,
reconstruct_state: &mut ValuesReconstructState,
ctx: &RequestContext,
) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
if query.is_empty() {
return Ok(BTreeMap::default());
}
let read_path = if self.conf.enable_read_path_debugging || ctx.read_path_debug() {
Some(ReadPath::new(
query.total_keyspace(),
query.high_watermark_lsn()?,
))
} else {
None
};
reconstruct_state.read_path = read_path;
let traversal_res: Result<(), _> = self
.get_vectored_reconstruct_data(query.clone(), reconstruct_state, ctx)
.await;
if let Err(err) = traversal_res {
// Wait for all the spawned IOs to complete.
// See comments on `spawn_io` inside `storage_layer` for more details.
let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)
.into_values()
.map(|state| state.collect_pending_ios())
.collect::<FuturesUnordered<_>>();
while collect_futs.next().await.is_some() {}
return Err(err);
};
let reconstruct_state = Arc::new(Mutex::new(reconstruct_state));
let futs = FuturesUnordered::new();
for (key, state) in std::mem::take(&mut reconstruct_state.lock().unwrap().keys) {
let req_lsn_for_key = query.map_key_to_lsn(&key);
futs.push({
let walredo_self = self.myself.upgrade().expect("&self method holds the arc");
let rc_clone = Arc::clone(&reconstruct_state);
async move {
assert_eq!(state.situation, ValueReconstructSituation::Complete);
let converted = match state.collect_pending_ios().await {
Ok(ok) => ok,
Err(err) => {
return (key, Err(err));
}
};
DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);
// The walredo module expects the records to be descending in terms of Lsn.
// And we submit the IOs in that order, so, there shuold be no need to sort here.
debug_assert!(
converted
.records
.is_sorted_by_key(|(lsn, _)| std::cmp::Reverse(*lsn)),
"{converted:?}"
);
{
let mut guard = rc_clone.lock().unwrap();
guard.set_debug_state(&converted);
}
(
key,
walredo_self
.reconstruct_value(
key,
req_lsn_for_key,
converted,
RedoAttemptType::ReadPage,
)
.await,
)
}
});
}
let results = futs
.collect::<BTreeMap<Key, Result<Bytes, PageReconstructError>>>()
.await;
Ok(results)
}
/// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
pub(crate) fn get_last_record_lsn(&self) -> Lsn {
self.last_record_lsn.load().last
@@ -2042,6 +2199,7 @@ impl Timeline {
compact_lsn_range: None,
sub_compaction: false,
sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
},
ctx,
)

View File

@@ -396,6 +396,7 @@ impl GcCompactionQueue {
}),
compact_lsn_range: None,
sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
},
permit,
);
@@ -512,6 +513,7 @@ impl GcCompactionQueue {
compact_key_range: Some(job.compact_key_range.into()),
compact_lsn_range: Some(job.compact_lsn_range.into()),
sub_compaction_max_job_size_mb: None,
gc_compaction_do_metadata_compaction: false,
};
pending_tasks.push(GcCompactionQueueItem::SubCompactionJob {
options,
@@ -785,6 +787,8 @@ pub(crate) struct GcCompactJob {
/// as specified here. The true range being compacted is `min_lsn/max_lsn` in [`GcCompactionJobDescription`].
/// min_lsn will always <= the lower bound specified here, and max_lsn will always >= the upper bound specified here.
pub compact_lsn_range: Range<Lsn>,
/// See [`CompactOptions::gc_compaction_do_metadata_compaction`].
pub do_metadata_compaction: bool,
}
impl GcCompactJob {
@@ -799,6 +803,7 @@ impl GcCompactJob {
.compact_lsn_range
.map(|x| x.into())
.unwrap_or(Lsn::INVALID..Lsn::MAX),
do_metadata_compaction: options.gc_compaction_do_metadata_compaction,
}
}
}
@@ -3174,6 +3179,7 @@ impl Timeline {
dry_run: job.dry_run,
compact_key_range: start..end,
compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
do_metadata_compaction: false,
});
current_start = Some(end);
}
@@ -3236,7 +3242,7 @@ impl Timeline {
async fn compact_with_gc_inner(
self: &Arc<Self>,
cancel: &CancellationToken,
job: GcCompactJob,
mut job: GcCompactJob,
ctx: &RequestContext,
yield_for_l0: bool,
) -> Result<CompactionOutcome, CompactionError> {
@@ -3244,6 +3250,28 @@ impl Timeline {
// with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
// Note that we already acquired the compaction lock when the outer `compact` function gets called.
// If the job is not configured to compact the metadata key range, shrink the key range
// to exclude the metadata key range. The check is done by checking if the end of the key range
// is larger than the start of the metadata key range. Note that metadata keys cover the entire
// second half of the keyspace, so it's enough to only check the end of the key range.
if !job.do_metadata_compaction
&& job.compact_key_range.end > Key::metadata_key_range().start
{
tracing::info!(
"compaction for metadata key range is not supported yet, overriding compact_key_range from {} to {}",
job.compact_key_range.end,
Key::metadata_key_range().start
);
// Shrink the key range to exclude the metadata key range.
job.compact_key_range.end = Key::metadata_key_range().start;
// Skip the job if the key range completely lies within the metadata key range.
if job.compact_key_range.start >= job.compact_key_range.end {
tracing::info!("compact_key_range is empty, skipping compaction");
return Ok(CompactionOutcome::Done);
}
}
let timer = Instant::now();
let begin_timer = timer;

View File

@@ -184,7 +184,7 @@ pub(super) async fn connection_manager_loop_step(
// If we've not received any updates from the broker from a while, are waiting for WAL
// and have no safekeeper connection or connection candidates, then it might be that
// the broker subscription is wedged. Drop the currrent subscription and re-subscribe
// the broker subscription is wedged. Drop the current subscription and re-subscribe
// with the goal of unblocking it.
_ = broker_reset_interval.tick() => {
let awaiting_lsn = wait_lsn_status.borrow().is_some();
@@ -192,7 +192,7 @@ pub(super) async fn connection_manager_loop_step(
let no_connection = connection_manager_state.wal_connection.is_none();
if awaiting_lsn && no_candidates && no_connection {
tracing::warn!("No broker updates received for a while, but waiting for WAL. Re-setting stream ...");
tracing::info!("No broker updates received for a while, but waiting for WAL. Re-setting stream ...");
broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
}
},

View File

@@ -1,6 +1,6 @@
//! An utilization metric which is used to decide on which pageserver to put next tenant.
//!
//! The metric is exposed via `GET /v1/utilization`. Refer and maintain it's openapi spec as the
//! The metric is exposed via `GET /v1/utilization`. Refer and maintain its openapi spec as the
//! truth.
use std::path::Path;

View File

@@ -32,9 +32,10 @@ use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
use pageserver_api::shard::ShardIdentity;
use postgres_ffi::walrecord::*;
use postgres_ffi::{
PgMajorVersion, TimestampTz, TransactionId, dispatch_pgversion, enum_pgversion,
enum_pgversion_dispatch, fsm_logical_to_physical, pg_constants,
PgMajorVersion, TransactionId, dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch,
fsm_logical_to_physical, pg_constants,
};
use postgres_ffi_types::TimestampTz;
use postgres_ffi_types::forknum::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
use tracing::*;
use utils::bin_ser::{DeserializeError, SerializeError};
@@ -1069,7 +1070,7 @@ impl WalIngest {
// NB: In PostgreSQL, the next-multi-xid stored in the control file is allowed to
// go to 0, and it's fixed up by skipping to FirstMultiXactId in functions that
// read it, like GetNewMultiXactId(). This is different from how nextXid is
// incremented! nextXid skips over < FirstNormalTransactionId when the the value
// incremented! nextXid skips over < FirstNormalTransactionId when the value
// is stored, so it's never 0 in a checkpoint.
//
// I don't know why it's done that way, it seems less error-prone to skip over 0

View File

@@ -421,7 +421,7 @@ check_getpage_response(PrefetchRequest* slot, NeonResponse* resp)
{
if (resp->tag != T_NeonGetPageResponse && resp->tag != T_NeonErrorResponse)
{
neon_shard_log(slot->shard_no, PANIC, "Unexpected prefetch response %d, ring_receive=%ld, ring_flush=%ld, ring_unused=%ld",
neon_shard_log(slot->shard_no, PANIC, "Unexpected prefetch response %d, ring_receive=" UINT64_FORMAT ", ring_flush=" UINT64_FORMAT ", ring_unused=" UINT64_FORMAT "",
resp->tag, MyPState->ring_receive, MyPState->ring_flush, MyPState->ring_unused);
}
if (neon_protocol_version >= 3)
@@ -438,7 +438,7 @@ check_getpage_response(PrefetchRequest* slot, NeonResponse* resp)
getpage_resp->req.blkno != slot->buftag.blockNum)
{
NEON_PANIC_CONNECTION_STATE(slot->shard_no, PANIC,
"Receive unexpected getpage response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
"Receive unexpected getpage response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(getpage_resp->req.rinfo), getpage_resp->req.forknum, getpage_resp->req.blkno,
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since), RelFileInfoFmt(rinfo), slot->buftag.forkNum, slot->buftag.blockNum);
}
@@ -447,7 +447,7 @@ check_getpage_response(PrefetchRequest* slot, NeonResponse* resp)
resp->lsn != slot->request_lsns.request_lsn ||
resp->not_modified_since != slot->request_lsns.not_modified_since)
{
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since));
}
@@ -496,9 +496,9 @@ communicator_prefetch_pump_state(void)
slot->my_ring_index != MyPState->ring_receive)
{
neon_shard_log(slot->shard_no, PANIC,
"Incorrect prefetch slot state after receive: status=%d response=%p my=%lu receive=%lu",
"Incorrect prefetch slot state after receive: status=%d response=%p my=" UINT64_FORMAT " receive=" UINT64_FORMAT "",
slot->status, slot->response,
(long) slot->my_ring_index, (long) MyPState->ring_receive);
slot->my_ring_index, MyPState->ring_receive);
}
/* update prefetch state */
MyPState->n_responses_buffered += 1;
@@ -789,9 +789,9 @@ prefetch_read(PrefetchRequest *slot)
slot->my_ring_index != MyPState->ring_receive)
{
neon_shard_log(slot->shard_no, PANIC,
"Incorrect prefetch read: status=%d response=%p my=%lu receive=%lu",
"Incorrect prefetch read: status=%d response=%p my=" UINT64_FORMAT " receive=" UINT64_FORMAT "",
slot->status, slot->response,
(long)slot->my_ring_index, (long)MyPState->ring_receive);
slot->my_ring_index, MyPState->ring_receive);
}
/*
@@ -816,9 +816,9 @@ prefetch_read(PrefetchRequest *slot)
slot->my_ring_index != MyPState->ring_receive)
{
neon_shard_log(shard_no, PANIC,
"Incorrect prefetch slot state after receive: status=%d response=%p my=%lu receive=%lu",
"Incorrect prefetch slot state after receive: status=%d response=%p my=" UINT64_FORMAT " receive=" UINT64_FORMAT "",
slot->status, slot->response,
(long) slot->my_ring_index, (long) MyPState->ring_receive);
slot->my_ring_index, MyPState->ring_receive);
}
/* update prefetch state */
@@ -852,8 +852,8 @@ prefetch_read(PrefetchRequest *slot)
* and the prefetch queue was flushed during the receive call
*/
neon_shard_log(shard_no, LOG,
"No response from reading prefetch entry %lu: %u/%u/%u.%u block %u. This can be caused by a concurrent disconnect",
(long) my_ring_index,
"No response from reading prefetch entry " UINT64_FORMAT ": %u/%u/%u.%u block %u. This can be caused by a concurrent disconnect",
my_ring_index,
RelFileInfoFmt(BufTagGetNRelFileInfo(buftag)),
buftag.forkNum, buftag.blockNum);
return false;
@@ -1844,7 +1844,7 @@ nm_to_string(NeonMessage *msg)
NeonDbSizeResponse *msg_resp = (NeonDbSizeResponse *) msg;
appendStringInfoString(&s, "{\"type\": \"NeonDbSizeResponse\"");
appendStringInfo(&s, ", \"db_size\": %ld}",
appendStringInfo(&s, ", \"db_size\": " INT64_FORMAT "}",
msg_resp->db_size);
appendStringInfoChar(&s, '}');
@@ -2045,7 +2045,7 @@ communicator_exists(NRelFileInfo rinfo, ForkNumber forkNum, neon_request_lsns *r
exists_resp->req.forknum != request.forknum)
{
NEON_PANIC_CONNECTION_STATE(0, PANIC,
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(exists_resp->req.rinfo), exists_resp->req.forknum,
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), request.forknum);
}
@@ -2058,14 +2058,14 @@ communicator_exists(NRelFileInfo rinfo, ForkNumber forkNum, neon_request_lsns *r
{
if (!equal_requests(resp, &request.hdr))
{
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
}
}
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "[reqid %lx] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
resp->reqid,
RelFileInfoFmt(rinfo),
forkNum,
@@ -2241,7 +2241,7 @@ Retry:
case T_NeonErrorResponse:
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "[shard %d, reqid %lx] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
errmsg(NEON_TAG "[shard %d, reqid " UINT64_HEX_FORMAT "] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
slot->shard_no, resp->reqid, blockno, RelFileInfoFmt(rinfo),
forkNum, LSN_FORMAT_ARGS(reqlsns->effective_request_lsn)),
errdetail("page server returned error: %s",
@@ -2294,7 +2294,7 @@ communicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum, neon_request_lsns *
relsize_resp->req.forknum != forknum)
{
NEON_PANIC_CONNECTION_STATE(0, PANIC,
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(relsize_resp->req.rinfo), relsize_resp->req.forknum,
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), forknum);
}
@@ -2307,14 +2307,14 @@ communicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum, neon_request_lsns *
{
if (!equal_requests(resp, &request.hdr))
{
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
}
}
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "[reqid %lx] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
resp->reqid,
RelFileInfoFmt(rinfo),
forknum,
@@ -2364,7 +2364,7 @@ communicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns)
dbsize_resp->req.dbNode != dbNode)
{
NEON_PANIC_CONNECTION_STATE(0, PANIC,
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), dbsize_resp->req.dbNode,
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), dbNode);
}
@@ -2377,14 +2377,14 @@ communicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns)
{
if (!equal_requests(resp, &request.hdr))
{
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
}
}
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "[reqid %lx] could not read db size of db %u from page server at lsn %X/%08X",
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read db size of db %u from page server at lsn %X/%08X",
resp->reqid,
dbNode, LSN_FORMAT_ARGS(request_lsns->effective_request_lsn)),
errdetail("page server returned error: %s",
@@ -2455,7 +2455,7 @@ communicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *re
slru_resp->req.segno != segno)
{
NEON_PANIC_CONNECTION_STATE(0, PANIC,
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%lluu}",
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%lluu}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), slru_resp->req.kind, slru_resp->req.segno,
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), kind, (unsigned long long) segno);
}
@@ -2469,14 +2469,14 @@ communicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *re
{
if (!equal_requests(resp, &request.hdr))
{
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
}
}
ereport(ERROR,
(errcode(ERRCODE_IO_ERROR),
errmsg(NEON_TAG "[reqid %lx] could not read SLRU %d segment %llu at lsn %X/%08X",
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read SLRU %d segment %llu at lsn %X/%08X",
resp->reqid,
kind,
(unsigned long long) segno,

View File

@@ -565,6 +565,15 @@ _PG_init(void)
PGC_POSTMASTER,
0,
NULL, NULL, NULL);
DefineCustomStringVariable(
"neon.privileged_role_name",
"Name of the 'weak' superuser role, which we give to the users",
NULL,
&privileged_role_name,
"neon_superuser",
PGC_POSTMASTER, 0, NULL, NULL, NULL);
/*
* Important: This must happen after other parts of the extension are
* loaded, otherwise any settings to GUCs that were set before the

View File

@@ -17,7 +17,6 @@ extern bool neon_enable_new_communicator;
extern char *neon_auth_token;
extern char *neon_timeline;
extern char *neon_tenant;
extern char *wal_acceptors_list;
extern int wal_acceptor_reconnect_timeout;
extern int wal_acceptor_connection_timeout;

View File

@@ -13,7 +13,7 @@
* accumulate changes. On subtransaction commit, the top of the stack
* is merged with the table below it.
*
* Support event triggers for neon_superuser
* Support event triggers for {privileged_role_name}
*
* IDENTIFICATION
* contrib/neon/neon_dll_handler.c
@@ -49,6 +49,7 @@
#include "neon_ddl_handler.h"
#include "neon_utils.h"
#include "neon.h"
static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
static fmgr_hook_type next_fmgr_hook = NULL;
@@ -541,11 +542,11 @@ NeonXactCallback(XactEvent event, void *arg)
}
static bool
RoleIsNeonSuperuser(const char *role_name)
IsPrivilegedRole(const char *role_name)
{
Assert(role_name);
return strcmp(role_name, "neon_superuser") == 0;
return strcmp(role_name, privileged_role_name) == 0;
}
static void
@@ -578,8 +579,9 @@ HandleCreateDb(CreatedbStmt *stmt)
{
const char *owner_name = defGetString(downer);
if (RoleIsNeonSuperuser(owner_name))
elog(ERROR, "can't create a database with owner neon_superuser");
if (IsPrivilegedRole(owner_name))
elog(ERROR, "could not create a database with owner %s", privileged_role_name);
entry->owner = get_role_oid(owner_name, false);
}
else
@@ -609,8 +611,9 @@ HandleAlterOwner(AlterOwnerStmt *stmt)
memset(entry->old_name, 0, sizeof(entry->old_name));
new_owner = get_rolespec_name(stmt->newowner);
if (RoleIsNeonSuperuser(new_owner))
elog(ERROR, "can't alter owner to neon_superuser");
if (IsPrivilegedRole(new_owner))
elog(ERROR, "could not alter owner to %s", privileged_role_name);
entry->owner = get_role_oid(new_owner, false);
entry->type = Op_Set;
}
@@ -716,8 +719,8 @@ HandleAlterRole(AlterRoleStmt *stmt)
InitRoleTableIfNeeded();
role_name = get_rolespec_name(stmt->role);
if (RoleIsNeonSuperuser(role_name) && !superuser())
elog(ERROR, "can't ALTER neon_superuser");
if (IsPrivilegedRole(role_name) && !superuser())
elog(ERROR, "could not ALTER %s", privileged_role_name);
dpass = NULL;
foreach(option, stmt->options)
@@ -831,7 +834,7 @@ HandleRename(RenameStmt *stmt)
*
* In vanilla only superuser can create Event Triggers.
*
* We allow it for neon_superuser by temporary switching to superuser. But as
* We allow it for {privileged_role_name} by temporary switching to superuser. But as
* far as event trigger can fire in superuser context we should protect
* superuser from execution of arbitrary user's code.
*
@@ -891,7 +894,7 @@ force_noop(FmgrInfo *finfo)
* Also skip executing Event Triggers when GUC neon.event_triggers has been
* set to false. This might be necessary to be able to connect again after a
* LOGIN Event Trigger has been installed that would prevent connections as
* neon_superuser.
* {privileged_role_name}.
*/
static void
neon_fmgr_hook(FmgrHookEventType event, FmgrInfo *flinfo, Datum *private)
@@ -910,24 +913,24 @@ neon_fmgr_hook(FmgrHookEventType event, FmgrInfo *flinfo, Datum *private)
}
/*
* The neon_superuser role can use the GUC neon.event_triggers to disable
* The {privileged_role_name} role can use the GUC neon.event_triggers to disable
* firing Event Trigger.
*
* SET neon.event_triggers TO false;
*
* This only applies to the neon_superuser role though, and only allows
* skipping Event Triggers owned by neon_superuser, which we check by
* proxy of the Event Trigger function being owned by neon_superuser.
* This only applies to the {privileged_role_name} role though, and only allows
* skipping Event Triggers owned by {privileged_role_name}, which we check by
* proxy of the Event Trigger function being owned by {privileged_role_name}.
*
* A role that is created in role neon_superuser should be allowed to also
* A role that is created in role {privileged_role_name} should be allowed to also
* benefit from the neon_event_triggers GUC, and will be considered the
* same as the neon_superuser role.
* same as the {privileged_role_name} role.
*/
if (event == FHET_START
&& !neon_event_triggers
&& is_neon_superuser())
&& is_privileged_role())
{
Oid neon_superuser_oid = get_role_oid("neon_superuser", false);
Oid weak_superuser_oid = get_role_oid(privileged_role_name, false);
/* Find the Function Attributes (owner Oid, security definer) */
const char *fun_owner_name = NULL;
@@ -937,8 +940,8 @@ neon_fmgr_hook(FmgrHookEventType event, FmgrInfo *flinfo, Datum *private)
LookupFuncOwnerSecDef(flinfo->fn_oid, &fun_owner, &fun_is_secdef);
fun_owner_name = GetUserNameFromId(fun_owner, false);
if (RoleIsNeonSuperuser(fun_owner_name)
|| has_privs_of_role(fun_owner, neon_superuser_oid))
if (IsPrivilegedRole(fun_owner_name)
|| has_privs_of_role(fun_owner, weak_superuser_oid))
{
elog(WARNING,
"Skipping Event Trigger: neon.event_triggers is false");
@@ -1149,13 +1152,13 @@ ProcessCreateEventTrigger(
}
/*
* Allow neon_superuser to create Event Trigger, while keeping the
* Allow {privileged_role_name} to create Event Trigger, while keeping the
* ownership of the object.
*
* For that we give superuser membership to the role for the execution of
* the command.
*/
if (IsTransactionState() && is_neon_superuser())
if (IsTransactionState() && is_privileged_role())
{
/* Find the Event Trigger function Oid */
Oid func_oid = LookupFuncName(stmt->funcname, 0, NULL, false);
@@ -1232,7 +1235,7 @@ ProcessCreateEventTrigger(
*
* That way [ ALTER | DROP ] EVENT TRIGGER commands just work.
*/
if (IsTransactionState() && is_neon_superuser())
if (IsTransactionState() && is_privileged_role())
{
if (!current_user_is_super)
{
@@ -1352,19 +1355,17 @@ NeonProcessUtility(
}
/*
* Only neon_superuser is granted privilege to edit neon.event_triggers GUC.
* Only {privileged_role_name} is granted privilege to edit neon.event_triggers GUC.
*/
static void
neon_event_triggers_assign_hook(bool newval, void *extra)
{
/* MyDatabaseId == InvalidOid || !OidIsValid(GetUserId()) */
if (IsTransactionState() && !is_neon_superuser())
if (IsTransactionState() && !is_privileged_role())
{
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied to set neon.event_triggers"),
errdetail("Only \"neon_superuser\" is allowed to set the GUC")));
errdetail("Only \"%s\" is allowed to set the GUC", privileged_role_name)));
}
}

View File

@@ -173,4 +173,8 @@ extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
extern TimeLineID GetWALInsertionTimeLine(void);
#endif
/* format codes not present in PG17-; but available in PG18+ */
#define INT64_HEX_FORMAT "%" INT64_MODIFIER "x"
#define UINT64_HEX_FORMAT "%" INT64_MODIFIER "x"
#endif /* NEON_PGVERSIONCOMPAT_H */

View File

@@ -377,6 +377,16 @@ typedef struct PageserverFeedback
} PageserverFeedback;
/* BEGIN_HADRON */
/**
* WAL proposer is the only backend that will update `sent_bytes` and `last_recorded_time_us`.
* Once the `sent_bytes` reaches the limit, it puts backpressure on PG backends.
*
* A PG backend checks `should_limit` to see if it should hit backpressure.
* - If yes, it also checks the `last_recorded_time_us` to see
* if it's time to push more WALs. This is because the WAL proposer
* only resets `should_limit` to 0 after it is notified about new WALs
* which might take a while.
*/
typedef struct WalRateLimiter
{
/* If the value is 1, PG backends will hit backpressure. */
@@ -384,7 +394,7 @@ typedef struct WalRateLimiter
/* The number of bytes sent in the current second. */
uint64 sent_bytes;
/* The last recorded time in microsecond. */
TimestampTz last_recorded_time_us;
pg_atomic_uint64 last_recorded_time_us;
} WalRateLimiter;
/* END_HADRON */

View File

@@ -449,8 +449,20 @@ backpressure_lag_impl(void)
}
state = GetWalpropShmemState();
if (state != NULL && pg_atomic_read_u32(&state->wal_rate_limiter.should_limit) == 1)
if (state != NULL && !!pg_atomic_read_u32(&state->wal_rate_limiter.should_limit))
{
TimestampTz now = GetCurrentTimestamp();
struct WalRateLimiter *limiter = &state->wal_rate_limiter;
uint64 last_recorded_time = pg_atomic_read_u64(&limiter->last_recorded_time_us);
if (now - last_recorded_time > USECS_PER_SEC)
{
/*
* The backend has past 1 second since the last recorded time and it's time to push more WALs.
* If the backends are pushing WALs too fast, the wal proposer will rate limit them again.
*/
uint32 expected = true;
pg_atomic_compare_exchange_u32(&state->wal_rate_limiter.should_limit, &expected, false);
}
return 1;
}
/* END_HADRON */
@@ -502,6 +514,7 @@ WalproposerShmemInit(void)
pg_atomic_init_u64(&walprop_shared->currentClusterSize, 0);
/* BEGIN_HADRON */
pg_atomic_init_u32(&walprop_shared->wal_rate_limiter.should_limit, 0);
pg_atomic_init_u64(&walprop_shared->wal_rate_limiter.last_recorded_time_us, 0);
/* END_HADRON */
}
LWLockRelease(AddinShmemInitLock);
@@ -520,6 +533,7 @@ WalproposerShmemInit_SyncSafekeeper(void)
pg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);
/* BEGIN_HADRON */
pg_atomic_init_u32(&walprop_shared->wal_rate_limiter.should_limit, 0);
pg_atomic_init_u64(&walprop_shared->wal_rate_limiter.last_recorded_time_us, 0);
/* END_HADRON */
}
@@ -1551,18 +1565,18 @@ XLogBroadcastWalProposer(WalProposer *wp)
{
uint64 max_wal_bytes = (uint64) databricks_max_wal_mb_per_second * 1024 * 1024;
struct WalRateLimiter *limiter = &state->wal_rate_limiter;
if (now - limiter->last_recorded_time_us > USECS_PER_SEC)
uint64 last_recorded_time = pg_atomic_read_u64(&limiter->last_recorded_time_us);
if (now - last_recorded_time > USECS_PER_SEC)
{
/* Reset the rate limiter */
limiter->last_recorded_time_us = now;
limiter->sent_bytes = 0;
pg_atomic_exchange_u32(&limiter->should_limit, 0);
pg_atomic_write_u64(&limiter->last_recorded_time_us, now);
pg_atomic_write_u32(&limiter->should_limit, false);
}
limiter->sent_bytes += (endptr - startptr);
if (limiter->sent_bytes > max_wal_bytes)
{
pg_atomic_exchange_u32(&limiter->should_limit, 1);
pg_atomic_write_u32(&limiter->should_limit, true);
}
}
/* END_HADRON */

209
poetry.lock generated
View File

@@ -2,127 +2,123 @@
[[package]]
name = "aiohappyeyeballs"
version = "2.3.5"
version = "2.6.1"
description = "Happy Eyeballs for asyncio"
optional = false
python-versions = ">=3.8"
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "aiohappyeyeballs-2.3.5-py3-none-any.whl", hash = "sha256:4d6dea59215537dbc746e93e779caea8178c866856a721c9c660d7a5a7b8be03"},
{file = "aiohappyeyeballs-2.3.5.tar.gz", hash = "sha256:6fa48b9f1317254f122a07a131a86b71ca6946ca989ce6326fff54a99a920105"},
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
]
[[package]]
name = "aiohttp"
version = "3.10.11"
version = "3.12.14"
description = "Async http client/server framework (asyncio)"
optional = false
python-versions = ">=3.8"
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"},
{file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"},
{file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"},
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"},
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"},
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"},
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"},
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"},
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"},
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"},
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"},
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"},
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"},
{file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"},
{file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"},
{file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"},
{file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"},
{file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"},
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"},
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"},
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"},
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"},
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"},
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"},
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"},
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"},
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"},
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"},
{file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"},
{file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"},
{file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"},
{file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"},
{file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"},
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"},
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"},
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"},
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"},
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"},
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"},
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"},
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"},
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"},
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"},
{file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"},
{file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"},
{file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"},
{file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"},
{file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"},
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"},
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"},
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"},
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"},
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"},
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"},
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"},
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"},
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"},
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"},
{file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"},
{file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"},
{file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"},
{file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"},
{file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"},
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"},
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"},
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"},
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"},
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"},
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"},
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"},
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"},
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"},
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"},
{file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"},
{file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"},
{file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"},
{file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"},
{file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"},
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"},
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"},
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"},
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"},
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"},
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"},
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"},
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"},
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"},
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"},
{file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"},
{file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"},
{file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"},
{file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:906d5075b5ba0dd1c66fcaaf60eb09926a9fef3ca92d912d2a0bbdbecf8b1248"},
{file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c875bf6fc2fd1a572aba0e02ef4e7a63694778c5646cdbda346ee24e630d30fb"},
{file = "aiohttp-3.12.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fbb284d15c6a45fab030740049d03c0ecd60edad9cd23b211d7e11d3be8d56fd"},
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e360381e02e1a05d36b223ecab7bc4a6e7b5ab15760022dc92589ee1d4238c"},
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aaf90137b5e5d84a53632ad95ebee5c9e3e7468f0aab92ba3f608adcb914fa95"},
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e532a25e4a0a2685fa295a31acf65e027fbe2bea7a4b02cdfbbba8a064577663"},
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eab9762c4d1b08ae04a6c77474e6136da722e34fdc0e6d6eab5ee93ac29f35d1"},
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abe53c3812b2899889a7fca763cdfaeee725f5be68ea89905e4275476ffd7e61"},
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5760909b7080aa2ec1d320baee90d03b21745573780a072b66ce633eb77a8656"},
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:02fcd3f69051467bbaa7f84d7ec3267478c7df18d68b2e28279116e29d18d4f3"},
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:4dcd1172cd6794884c33e504d3da3c35648b8be9bfa946942d353b939d5f1288"},
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:224d0da41355b942b43ad08101b1b41ce633a654128ee07e36d75133443adcda"},
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e387668724f4d734e865c1776d841ed75b300ee61059aca0b05bce67061dcacc"},
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:dec9cde5b5a24171e0b0a4ca064b1414950904053fb77c707efd876a2da525d8"},
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bbad68a2af4877cc103cd94af9160e45676fc6f0c14abb88e6e092b945c2c8e3"},
{file = "aiohttp-3.12.14-cp310-cp310-win32.whl", hash = "sha256:ee580cb7c00bd857b3039ebca03c4448e84700dc1322f860cf7a500a6f62630c"},
{file = "aiohttp-3.12.14-cp310-cp310-win_amd64.whl", hash = "sha256:cf4f05b8cea571e2ccc3ca744e35ead24992d90a72ca2cf7ab7a2efbac6716db"},
{file = "aiohttp-3.12.14-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f4552ff7b18bcec18b60a90c6982049cdb9dac1dba48cf00b97934a06ce2e597"},
{file = "aiohttp-3.12.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8283f42181ff6ccbcf25acaae4e8ab2ff7e92b3ca4a4ced73b2c12d8cd971393"},
{file = "aiohttp-3.12.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:040afa180ea514495aaff7ad34ec3d27826eaa5d19812730fe9e529b04bb2179"},
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b413c12f14c1149f0ffd890f4141a7471ba4b41234fe4fd4a0ff82b1dc299dbb"},
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1d6f607ce2e1a93315414e3d448b831238f1874b9968e1195b06efaa5c87e245"},
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:565e70d03e924333004ed101599902bba09ebb14843c8ea39d657f037115201b"},
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4699979560728b168d5ab63c668a093c9570af2c7a78ea24ca5212c6cdc2b641"},
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad5fdf6af93ec6c99bf800eba3af9a43d8bfd66dce920ac905c817ef4a712afe"},
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ac76627c0b7ee0e80e871bde0d376a057916cb008a8f3ffc889570a838f5cc7"},
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:798204af1180885651b77bf03adc903743a86a39c7392c472891649610844635"},
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4f1205f97de92c37dd71cf2d5bcfb65fdaed3c255d246172cce729a8d849b4da"},
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:76ae6f1dd041f85065d9df77c6bc9c9703da9b5c018479d20262acc3df97d419"},
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a194ace7bc43ce765338ca2dfb5661489317db216ea7ea700b0332878b392cab"},
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:16260e8e03744a6fe3fcb05259eeab8e08342c4c33decf96a9dad9f1187275d0"},
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8c779e5ebbf0e2e15334ea404fcce54009dc069210164a244d2eac8352a44b28"},
{file = "aiohttp-3.12.14-cp311-cp311-win32.whl", hash = "sha256:a289f50bf1bd5be227376c067927f78079a7bdeccf8daa6a9e65c38bae14324b"},
{file = "aiohttp-3.12.14-cp311-cp311-win_amd64.whl", hash = "sha256:0b8a69acaf06b17e9c54151a6c956339cf46db4ff72b3ac28516d0f7068f4ced"},
{file = "aiohttp-3.12.14-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a0ecbb32fc3e69bc25efcda7d28d38e987d007096cbbeed04f14a6662d0eee22"},
{file = "aiohttp-3.12.14-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0400f0ca9bb3e0b02f6466421f253797f6384e9845820c8b05e976398ac1d81a"},
{file = "aiohttp-3.12.14-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a56809fed4c8a830b5cae18454b7464e1529dbf66f71c4772e3cfa9cbec0a1ff"},
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f2e373276e4755691a963e5d11756d093e346119f0627c2d6518208483fb6d"},
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ca39e433630e9a16281125ef57ece6817afd1d54c9f1bf32e901f38f16035869"},
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c748b3f8b14c77720132b2510a7d9907a03c20ba80f469e58d5dfd90c079a1c"},
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a568abe1b15ce69d4cc37e23020720423f0728e3cb1f9bcd3f53420ec3bfe7"},
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9888e60c2c54eaf56704b17feb558c7ed6b7439bca1e07d4818ab878f2083660"},
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3006a1dc579b9156de01e7916d38c63dc1ea0679b14627a37edf6151bc530088"},
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa8ec5c15ab80e5501a26719eb48a55f3c567da45c6ea5bb78c52c036b2655c7"},
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:39b94e50959aa07844c7fe2206b9f75d63cc3ad1c648aaa755aa257f6f2498a9"},
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:04c11907492f416dad9885d503fbfc5dcb6768d90cad8639a771922d584609d3"},
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:88167bd9ab69bb46cee91bd9761db6dfd45b6e76a0438c7e884c3f8160ff21eb"},
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:791504763f25e8f9f251e4688195e8b455f8820274320204f7eafc467e609425"},
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2785b112346e435dd3a1a67f67713a3fe692d288542f1347ad255683f066d8e0"},
{file = "aiohttp-3.12.14-cp312-cp312-win32.whl", hash = "sha256:15f5f4792c9c999a31d8decf444e79fcfd98497bf98e94284bf390a7bb8c1729"},
{file = "aiohttp-3.12.14-cp312-cp312-win_amd64.whl", hash = "sha256:3b66e1a182879f579b105a80d5c4bd448b91a57e8933564bf41665064796a338"},
{file = "aiohttp-3.12.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3143a7893d94dc82bc409f7308bc10d60285a3cd831a68faf1aa0836c5c3c767"},
{file = "aiohttp-3.12.14-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3d62ac3d506cef54b355bd34c2a7c230eb693880001dfcda0bf88b38f5d7af7e"},
{file = "aiohttp-3.12.14-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:48e43e075c6a438937c4de48ec30fa8ad8e6dfef122a038847456bfe7b947b63"},
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:077b4488411a9724cecc436cbc8c133e0d61e694995b8de51aaf351c7578949d"},
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d8c35632575653f297dcbc9546305b2c1133391089ab925a6a3706dfa775ccab"},
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b8ce87963f0035c6834b28f061df90cf525ff7c9b6283a8ac23acee6502afd4"},
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a2cf66e32a2563bb0766eb24eae7e9a269ac0dc48db0aae90b575dc9583026"},
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdea089caf6d5cde975084a884c72d901e36ef9c2fd972c9f51efbbc64e96fbd"},
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7865f27db67d49e81d463da64a59365ebd6b826e0e4847aa111056dcb9dc88"},
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0ab5b38a6a39781d77713ad930cb5e7feea6f253de656a5f9f281a8f5931b086"},
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9b3b15acee5c17e8848d90a4ebc27853f37077ba6aec4d8cb4dbbea56d156933"},
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e4c972b0bdaac167c1e53e16a16101b17c6d0ed7eac178e653a07b9f7fad7151"},
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7442488b0039257a3bdbc55f7209587911f143fca11df9869578db6c26feeeb8"},
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f68d3067eecb64c5e9bab4a26aa11bd676f4c70eea9ef6536b0a4e490639add3"},
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f88d3704c8b3d598a08ad17d06006cb1ca52a1182291f04979e305c8be6c9758"},
{file = "aiohttp-3.12.14-cp313-cp313-win32.whl", hash = "sha256:a3c99ab19c7bf375c4ae3debd91ca5d394b98b6089a03231d4c580ef3c2ae4c5"},
{file = "aiohttp-3.12.14-cp313-cp313-win_amd64.whl", hash = "sha256:3f8aad695e12edc9d571f878c62bedc91adf30c760c8632f09663e5f564f4baa"},
{file = "aiohttp-3.12.14-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b8cc6b05e94d837bcd71c6531e2344e1ff0fb87abe4ad78a9261d67ef5d83eae"},
{file = "aiohttp-3.12.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1dcb015ac6a3b8facd3677597edd5ff39d11d937456702f0bb2b762e390a21b"},
{file = "aiohttp-3.12.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3779ed96105cd70ee5e85ca4f457adbce3d9ff33ec3d0ebcdf6c5727f26b21b3"},
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:717a0680729b4ebd7569c1dcd718c46b09b360745fd8eb12317abc74b14d14d0"},
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b5dd3a2ef7c7e968dbbac8f5574ebeac4d2b813b247e8cec28174a2ba3627170"},
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4710f77598c0092239bc12c1fcc278a444e16c7032d91babf5abbf7166463f7b"},
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f3e9f75ae842a6c22a195d4a127263dbf87cbab729829e0bd7857fb1672400b2"},
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f9c8d55d6802086edd188e3a7d85a77787e50d56ce3eb4757a3205fa4657922"},
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79b29053ff3ad307880d94562cca80693c62062a098a5776ea8ef5ef4b28d140"},
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23e1332fff36bebd3183db0c7a547a1da9d3b4091509f6d818e098855f2f27d3"},
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:a564188ce831fd110ea76bcc97085dd6c625b427db3f1dbb14ca4baa1447dcbc"},
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a7a1b4302f70bb3ec40ca86de82def532c97a80db49cac6a6700af0de41af5ee"},
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:1b07ccef62950a2519f9bfc1e5b294de5dd84329f444ca0b329605ea787a3de5"},
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:938bd3ca6259e7e48b38d84f753d548bd863e0c222ed6ee6ace3fd6752768a84"},
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8bc784302b6b9f163b54c4e93d7a6f09563bd01ff2b841b29ed3ac126e5040bf"},
{file = "aiohttp-3.12.14-cp39-cp39-win32.whl", hash = "sha256:a3416f95961dd7d5393ecff99e3f41dc990fb72eda86c11f2a60308ac6dcd7a0"},
{file = "aiohttp-3.12.14-cp39-cp39-win_amd64.whl", hash = "sha256:196858b8820d7f60578f8b47e5669b3195c21d8ab261e39b1d705346458f445f"},
{file = "aiohttp-3.12.14.tar.gz", hash = "sha256:6e06e120e34d93100de448fd941522e11dafa78ef1a893c179901b7d66aa29f2"},
]
[package.dependencies]
aiohappyeyeballs = ">=2.3.0"
aiosignal = ">=1.1.2"
aiohappyeyeballs = ">=2.5.0"
aiosignal = ">=1.4.0"
attrs = ">=17.3.0"
frozenlist = ">=1.1.1"
multidict = ">=4.5,<7.0"
yarl = ">=1.12.0,<2.0"
propcache = ">=0.2.0"
yarl = ">=1.17.0,<2.0"
[package.extras]
speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "brotlicffi ; platform_python_implementation != \"CPython\""]
[[package]]
name = "aiopg"
@@ -145,18 +141,19 @@ sa = ["sqlalchemy[postgresql-psycopg2binary] (>=1.3,<1.5)"]
[[package]]
name = "aiosignal"
version = "1.3.1"
version = "1.4.0"
description = "aiosignal: a list of registered asynchronous callbacks"
optional = false
python-versions = ">=3.7"
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
{file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
{file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"},
{file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"},
]
[package.dependencies]
frozenlist = ">=1.1.0"
typing-extensions = {version = ">=4.2", markers = "python_version < \"3.13\""}
[[package]]
name = "allure-pytest"
@@ -3847,4 +3844,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = "^3.11"
content-hash = "bd93313f110110aa53b24a3ed47ba2d7f60e2c658a79cdff7320fed1bb1b57b5"
content-hash = "6a1e8ba06b8194bf28d87fd5e184e2ddc2b4a19dffcbe3953b26da3d55c9212f"

View File

@@ -1,13 +1,11 @@
use std::collections::{HashMap, HashSet, hash_map};
use std::convert::Infallible;
use std::sync::atomic::AtomicU64;
use std::time::Duration;
use async_trait::async_trait;
use clashmap::ClashMap;
use clashmap::mapref::one::Ref;
use rand::{Rng, thread_rng};
use tokio::sync::Mutex;
use tokio::time::Instant;
use tracing::{debug, info};
@@ -22,31 +20,23 @@ pub(crate) trait ProjectInfoCache {
fn invalidate_endpoint_access_for_project(&self, project_id: ProjectIdInt);
fn invalidate_endpoint_access_for_org(&self, account_id: AccountIdInt);
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
async fn decrement_active_listeners(&self);
async fn increment_active_listeners(&self);
}
struct Entry<T> {
created_at: Instant,
expires_at: Instant,
value: T,
}
impl<T> Entry<T> {
pub(crate) fn new(value: T) -> Self {
pub(crate) fn new(value: T, ttl: Duration) -> Self {
Self {
created_at: Instant::now(),
expires_at: Instant::now() + ttl,
value,
}
}
pub(crate) fn get(&self, valid_since: Instant) -> Option<&T> {
(valid_since < self.created_at).then_some(&self.value)
}
}
impl<T> From<T> for Entry<T> {
fn from(value: T) -> Self {
Self::new(value)
pub(crate) fn get(&self) -> Option<&T> {
(self.expires_at > Instant::now()).then_some(&self.value)
}
}
@@ -56,18 +46,12 @@ struct EndpointInfo {
}
impl EndpointInfo {
pub(crate) fn get_role_secret(
&self,
role_name: RoleNameInt,
valid_since: Instant,
) -> Option<RoleAccessControl> {
let controls = self.role_controls.get(&role_name)?;
controls.get(valid_since).cloned()
pub(crate) fn get_role_secret(&self, role_name: RoleNameInt) -> Option<RoleAccessControl> {
self.role_controls.get(&role_name)?.get().cloned()
}
pub(crate) fn get_controls(&self, valid_since: Instant) -> Option<EndpointAccessControl> {
let controls = self.controls.as_ref()?;
controls.get(valid_since).cloned()
pub(crate) fn get_controls(&self) -> Option<EndpointAccessControl> {
self.controls.as_ref()?.get().cloned()
}
pub(crate) fn invalidate_endpoint(&mut self) {
@@ -92,11 +76,8 @@ pub struct ProjectInfoCacheImpl {
project2ep: ClashMap<ProjectIdInt, HashSet<EndpointIdInt>>,
// FIXME(stefan): we need a way to GC the account2ep map.
account2ep: ClashMap<AccountIdInt, HashSet<EndpointIdInt>>,
config: ProjectInfoCacheOptions,
start_time: Instant,
ttl_disabled_since_us: AtomicU64,
active_listeners_lock: Mutex<usize>,
config: ProjectInfoCacheOptions,
}
#[async_trait]
@@ -152,29 +133,6 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
}
}
}
async fn decrement_active_listeners(&self) {
let mut listeners_guard = self.active_listeners_lock.lock().await;
if *listeners_guard == 0 {
tracing::error!("active_listeners count is already 0, something is broken");
return;
}
*listeners_guard -= 1;
if *listeners_guard == 0 {
self.ttl_disabled_since_us
.store(u64::MAX, std::sync::atomic::Ordering::SeqCst);
}
}
async fn increment_active_listeners(&self) {
let mut listeners_guard = self.active_listeners_lock.lock().await;
*listeners_guard += 1;
if *listeners_guard == 1 {
let new_ttl = (self.start_time.elapsed() + self.config.ttl).as_micros() as u64;
self.ttl_disabled_since_us
.store(new_ttl, std::sync::atomic::Ordering::SeqCst);
}
}
}
impl ProjectInfoCacheImpl {
@@ -184,9 +142,6 @@ impl ProjectInfoCacheImpl {
project2ep: ClashMap::new(),
account2ep: ClashMap::new(),
config,
ttl_disabled_since_us: AtomicU64::new(u64::MAX),
start_time: Instant::now(),
active_listeners_lock: Mutex::new(0),
}
}
@@ -203,19 +158,17 @@ impl ProjectInfoCacheImpl {
endpoint_id: &EndpointId,
role_name: &RoleName,
) -> Option<RoleAccessControl> {
let valid_since = self.get_cache_times();
let role_name = RoleNameInt::get(role_name)?;
let endpoint_info = self.get_endpoint_cache(endpoint_id)?;
endpoint_info.get_role_secret(role_name, valid_since)
endpoint_info.get_role_secret(role_name)
}
pub(crate) fn get_endpoint_access(
&self,
endpoint_id: &EndpointId,
) -> Option<EndpointAccessControl> {
let valid_since = self.get_cache_times();
let endpoint_info = self.get_endpoint_cache(endpoint_id)?;
endpoint_info.get_controls(valid_since)
endpoint_info.get_controls()
}
pub(crate) fn insert_endpoint_access(
@@ -237,8 +190,8 @@ impl ProjectInfoCacheImpl {
return;
}
let controls = Entry::from(controls);
let role_controls = Entry::from(role_controls);
let controls = Entry::new(controls, self.config.ttl);
let role_controls = Entry::new(role_controls, self.config.ttl);
match self.cache.entry(endpoint_id) {
clashmap::Entry::Vacant(e) => {
@@ -275,27 +228,6 @@ impl ProjectInfoCacheImpl {
}
}
fn ignore_ttl_since(&self) -> Option<Instant> {
let ttl_disabled_since_us = self
.ttl_disabled_since_us
.load(std::sync::atomic::Ordering::Relaxed);
if ttl_disabled_since_us == u64::MAX {
return None;
}
Some(self.start_time + Duration::from_micros(ttl_disabled_since_us))
}
fn get_cache_times(&self) -> Instant {
let mut valid_since = Instant::now() - self.config.ttl;
if let Some(ignore_ttl_since) = self.ignore_ttl_since() {
// We are fine if entry is not older than ttl or was added before we are getting notifications.
valid_since = valid_since.min(ignore_ttl_since);
}
valid_since
}
pub fn maybe_invalidate_role_secret(&self, endpoint_id: &EndpointId, role_name: &RoleName) {
let Some(endpoint_id) = EndpointIdInt::get(endpoint_id) else {
return;
@@ -313,16 +245,7 @@ impl ProjectInfoCacheImpl {
return;
};
let created_at = role_controls.get().created_at;
let expire = match self.ignore_ttl_since() {
// if ignoring TTL, we should still try and roll the password if it's old
// and we the client gave an incorrect password. There could be some lag on the redis channel.
Some(_) => created_at + self.config.ttl < Instant::now(),
// edge case: redis is down, let's be generous and invalidate the cache immediately.
None => true,
};
if expire {
if role_controls.get().expires_at <= Instant::now() {
role_controls.remove();
}
}

View File

@@ -14,8 +14,8 @@ use std::time::{Duration, Instant};
use hashlink::{LruCache, linked_hash_map::RawEntryMut};
use tracing::debug;
use super::Cache;
use super::common::Cached;
use super::{Cache, timed_lru};
/// An implementation of timed LRU cache with fixed capacity.
/// Key properties:
@@ -30,7 +30,7 @@ use super::{Cache, timed_lru};
///
/// * There's an API for immediate invalidation (removal) of a cache entry;
/// It's useful in case we know for sure that the entry is no longer correct.
/// See [`timed_lru::Cached`] for more information.
/// See [`Cached`] for more information.
///
/// * Expired entries are kept in the cache, until they are evicted by the LRU policy,
/// or by a successful lookup (i.e. the entry hasn't expired yet).
@@ -217,15 +217,18 @@ impl<K: Hash + Eq + Clone, V: Clone> TimedLru<K, V> {
}
impl<K: Hash + Eq, V: Clone> TimedLru<K, V> {
/// Retrieve a cached entry in convenient wrapper.
pub(crate) fn get<Q>(&self, key: &Q) -> Option<timed_lru::Cached<&Self>>
/// Retrieve a cached entry in convenient wrapper, alongside timing information.
pub(crate) fn get_with_created_at<Q>(
&self,
key: &Q,
) -> Option<Cached<&Self, (<Self as Cache>::Value, Instant)>>
where
K: Borrow<Q> + Clone,
Q: Hash + Eq + ?Sized,
{
self.get_raw(key, |key, entry| Cached {
token: Some((self, key.clone())),
value: entry.value.clone(),
value: (entry.value.clone(), entry.created_at),
})
}
}

View File

@@ -23,12 +23,13 @@ use crate::control_plane::errors::{
ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
};
use crate::control_plane::locks::ApiLocks;
use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse};
use crate::control_plane::{
AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, EndpointAccessControl, NodeInfo,
RoleAccessControl,
};
use crate::metrics::Metrics;
use crate::proxy::retry::CouldRetry;
use crate::rate_limiter::WakeComputeRateLimiter;
use crate::types::{EndpointCacheKey, EndpointId, RoleName};
use crate::{compute, http, scram};
@@ -382,16 +383,31 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
macro_rules! check_cache {
() => {
if let Some(cached) = self.caches.node_info.get(&key) {
let (cached, info) = cached.take_value();
let info = info.map_err(|c| {
info!(key = &*key, "found cached wake_compute error");
WakeComputeError::ControlPlane(ControlPlaneError::Message(Box::new(*c)))
})?;
if let Some(cached) = self.caches.node_info.get_with_created_at(&key) {
let (cached, (info, created_at)) = cached.take_value();
return match info {
Err(mut msg) => {
info!(key = &*key, "found cached wake_compute error");
debug!(key = &*key, "found cached compute node info");
ctx.set_project(info.aux.clone());
return Ok(cached.map(|()| info));
// if retry_delay_ms is set, reduce it by the amount of time it spent in cache
if let Some(status) = &mut msg.status {
if let Some(retry_info) = &mut status.details.retry_info {
retry_info.retry_delay_ms = retry_info
.retry_delay_ms
.saturating_sub(created_at.elapsed().as_millis() as u64)
}
}
Err(WakeComputeError::ControlPlane(ControlPlaneError::Message(
msg,
)))
}
Ok(info) => {
debug!(key = &*key, "found cached compute node info");
ctx.set_project(info.aux.clone());
Ok(cached.map(|()| info))
}
};
}
};
}
@@ -434,42 +450,29 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
Ok(cached.map(|()| node))
}
Err(err) => match err {
WakeComputeError::ControlPlane(ControlPlaneError::Message(err)) => {
let Some(status) = &err.status else {
return Err(WakeComputeError::ControlPlane(ControlPlaneError::Message(
err,
)));
};
WakeComputeError::ControlPlane(ControlPlaneError::Message(ref msg)) => {
let retry_info = msg.status.as_ref().and_then(|s| s.details.retry_info);
let reason = status
.details
.error_info
.map_or(Reason::Unknown, |x| x.reason);
// if we can retry this error, do not cache it.
if reason.can_retry() {
return Err(WakeComputeError::ControlPlane(ControlPlaneError::Message(
err,
)));
// If we can retry this error, do not cache it,
// unless we were given a retry delay.
if msg.could_retry() && retry_info.is_none() {
return Err(err);
}
// at this point, we should only have quota errors.
debug!(
key = &*key,
"created a cache entry for the wake compute error"
);
self.caches.node_info.insert_ttl(
key,
Err(err.clone()),
Duration::from_secs(30),
);
let ttl = retry_info.map_or(Duration::from_secs(30), |r| {
Duration::from_millis(r.retry_delay_ms)
});
Err(WakeComputeError::ControlPlane(ControlPlaneError::Message(
err,
)))
self.caches.node_info.insert_ttl(key, Err(msg.clone()), ttl);
Err(err)
}
err => return Err(err),
err => Err(err),
},
}
}

View File

@@ -43,28 +43,35 @@ impl UserFacingError for ControlPlaneError {
}
impl ReportableError for ControlPlaneError {
fn get_error_kind(&self) -> crate::error::ErrorKind {
fn get_error_kind(&self) -> ErrorKind {
match self {
ControlPlaneError::Message(e) => match e.get_reason() {
Reason::RoleProtected => ErrorKind::User,
Reason::ResourceNotFound => ErrorKind::User,
Reason::ProjectNotFound => ErrorKind::User,
Reason::EndpointNotFound => ErrorKind::User,
Reason::BranchNotFound => ErrorKind::User,
Reason::RoleProtected
| Reason::ResourceNotFound
| Reason::ProjectNotFound
| Reason::EndpointNotFound
| Reason::EndpointDisabled
| Reason::BranchNotFound
| Reason::InvalidEphemeralEndpointOptions => ErrorKind::User,
Reason::RateLimitExceeded => ErrorKind::ServiceRateLimit,
Reason::NonDefaultBranchComputeTimeExceeded => ErrorKind::Quota,
Reason::ActiveTimeQuotaExceeded => ErrorKind::Quota,
Reason::ComputeTimeQuotaExceeded => ErrorKind::Quota,
Reason::WrittenDataQuotaExceeded => ErrorKind::Quota,
Reason::DataTransferQuotaExceeded => ErrorKind::Quota,
Reason::LogicalSizeQuotaExceeded => ErrorKind::Quota,
Reason::ConcurrencyLimitReached => ErrorKind::ControlPlane,
Reason::LockAlreadyTaken => ErrorKind::ControlPlane,
Reason::RunningOperations => ErrorKind::ControlPlane,
Reason::ActiveEndpointsLimitExceeded => ErrorKind::ControlPlane,
Reason::Unknown => ErrorKind::ControlPlane,
Reason::NonDefaultBranchComputeTimeExceeded
| Reason::ActiveTimeQuotaExceeded
| Reason::ComputeTimeQuotaExceeded
| Reason::WrittenDataQuotaExceeded
| Reason::DataTransferQuotaExceeded
| Reason::LogicalSizeQuotaExceeded
| Reason::ActiveEndpointsLimitExceeded => ErrorKind::Quota,
Reason::ConcurrencyLimitReached
| Reason::LockAlreadyTaken
| Reason::RunningOperations
| Reason::EndpointIdle
| Reason::ProjectUnderMaintenance
| Reason::Unknown => ErrorKind::ControlPlane,
},
ControlPlaneError::Transport(_) => crate::error::ErrorKind::ControlPlane,
ControlPlaneError::Transport(_) => ErrorKind::ControlPlane,
}
}
}
@@ -120,10 +127,10 @@ impl UserFacingError for GetAuthInfoError {
}
impl ReportableError for GetAuthInfoError {
fn get_error_kind(&self) -> crate::error::ErrorKind {
fn get_error_kind(&self) -> ErrorKind {
match self {
Self::BadSecret => crate::error::ErrorKind::ControlPlane,
Self::ApiError(_) => crate::error::ErrorKind::ControlPlane,
Self::BadSecret => ErrorKind::ControlPlane,
Self::ApiError(_) => ErrorKind::ControlPlane,
}
}
}

View File

@@ -126,10 +126,16 @@ pub(crate) enum Reason {
/// or that the subject doesn't have enough permissions to access the requested endpoint.
#[serde(rename = "ENDPOINT_NOT_FOUND")]
EndpointNotFound,
/// EndpointDisabled indicates that the endpoint has been disabled and does not accept connections.
#[serde(rename = "ENDPOINT_DISABLED")]
EndpointDisabled,
/// BranchNotFound indicates that the branch wasn't found, usually due to the provided ID not being correct,
/// or that the subject doesn't have enough permissions to access the requested branch.
#[serde(rename = "BRANCH_NOT_FOUND")]
BranchNotFound,
/// InvalidEphemeralEndpointOptions indicates that the specified LSN or timestamp are wrong.
#[serde(rename = "INVALID_EPHEMERAL_OPTIONS")]
InvalidEphemeralEndpointOptions,
/// RateLimitExceeded indicates that the rate limit for the operation has been exceeded.
#[serde(rename = "RATE_LIMIT_EXCEEDED")]
RateLimitExceeded,
@@ -152,6 +158,9 @@ pub(crate) enum Reason {
/// LogicalSizeQuotaExceeded indicates that the logical size quota was exceeded.
#[serde(rename = "LOGICAL_SIZE_QUOTA_EXCEEDED")]
LogicalSizeQuotaExceeded,
/// ActiveEndpointsLimitExceeded indicates that the limit of concurrently active endpoints was exceeded.
#[serde(rename = "ACTIVE_ENDPOINTS_LIMIT_EXCEEDED")]
ActiveEndpointsLimitExceeded,
/// RunningOperations indicates that the project already has some running operations
/// and scheduling of new ones is prohibited.
#[serde(rename = "RUNNING_OPERATIONS")]
@@ -162,9 +171,13 @@ pub(crate) enum Reason {
/// LockAlreadyTaken indicates that the we attempted to take a lock that was already taken.
#[serde(rename = "LOCK_ALREADY_TAKEN")]
LockAlreadyTaken,
/// ActiveEndpointsLimitExceeded indicates that the limit of concurrently active endpoints was exceeded.
#[serde(rename = "ACTIVE_ENDPOINTS_LIMIT_EXCEEDED")]
ActiveEndpointsLimitExceeded,
/// EndpointIdle indicates that the endpoint cannot become active, because it's idle.
#[serde(rename = "ENDPOINT_IDLE")]
EndpointIdle,
/// ProjectUnderMaintenance indicates that the project is currently ongoing maintenance,
/// and thus cannot accept connections.
#[serde(rename = "PROJECT_UNDER_MAINTENANCE")]
ProjectUnderMaintenance,
#[default]
#[serde(other)]
Unknown,
@@ -184,13 +197,15 @@ impl Reason {
pub(crate) fn can_retry(self) -> bool {
match self {
// do not retry role protected errors
// not a transitive error
// not a transient error
Reason::RoleProtected => false,
// on retry, it will still not be found
// on retry, it will still not be found or valid
Reason::ResourceNotFound
| Reason::ProjectNotFound
| Reason::EndpointNotFound
| Reason::BranchNotFound => false,
| Reason::EndpointDisabled
| Reason::BranchNotFound
| Reason::InvalidEphemeralEndpointOptions => false,
// we were asked to go away
Reason::RateLimitExceeded
| Reason::NonDefaultBranchComputeTimeExceeded
@@ -200,11 +215,13 @@ impl Reason {
| Reason::DataTransferQuotaExceeded
| Reason::LogicalSizeQuotaExceeded
| Reason::ActiveEndpointsLimitExceeded => false,
// transitive error. control plane is currently busy
// transient error. control plane is currently busy
// but might be ready soon
Reason::RunningOperations
| Reason::ConcurrencyLimitReached
| Reason::LockAlreadyTaken => true,
| Reason::LockAlreadyTaken
| Reason::EndpointIdle
| Reason::ProjectUnderMaintenance => true,
// unknown error. better not retry it.
Reason::Unknown => false,
}

View File

@@ -1,12 +1,10 @@
use std::cell::RefCell;
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
use std::{env, io};
use chrono::{DateTime, Utc};
use opentelemetry::trace::TraceContextExt;
use serde::ser::{SerializeMap, Serializer};
use tracing::subscriber::Interest;
use tracing::{Event, Metadata, Span, Subscriber, callsite, span};
use tracing_opentelemetry::OpenTelemetrySpanExt;
@@ -16,7 +14,9 @@ use tracing_subscriber::fmt::time::SystemTime;
use tracing_subscriber::fmt::{FormatEvent, FormatFields};
use tracing_subscriber::layer::{Context, Layer};
use tracing_subscriber::prelude::*;
use tracing_subscriber::registry::{LookupSpan, SpanRef};
use tracing_subscriber::registry::LookupSpan;
use crate::metrics::Metrics;
/// Initialize logging and OpenTelemetry tracing and exporter.
///
@@ -210,6 +210,9 @@ struct JsonLoggingLayer<C: Clock, W: MakeWriter> {
/// tracks which fields of each **event** are duplicates
skipped_field_indices: CallsiteMap<SkippedFieldIndices>,
/// tracks callsite names to an ID.
callsite_name_ids: papaya::HashMap<&'static str, u32, ahash::RandomState>,
span_info: CallsiteMap<CallsiteSpanInfo>,
/// Fields we want to keep track of in a separate json object.
@@ -222,6 +225,7 @@ impl<C: Clock, W: MakeWriter> JsonLoggingLayer<C, W> {
clock,
skipped_field_indices: CallsiteMap::default(),
span_info: CallsiteMap::default(),
callsite_name_ids: papaya::HashMap::default(),
writer,
extract_fields,
}
@@ -232,7 +236,7 @@ impl<C: Clock, W: MakeWriter> JsonLoggingLayer<C, W> {
self.span_info
.pin()
.get_or_insert_with(metadata.callsite(), || {
CallsiteSpanInfo::new(metadata, self.extract_fields)
CallsiteSpanInfo::new(&self.callsite_name_ids, metadata, self.extract_fields)
})
.clone()
}
@@ -249,7 +253,7 @@ where
// early, before OTel machinery, and add as event extension.
let now = self.clock.now();
let res: io::Result<()> = EVENT_FORMATTER.with(|f| {
EVENT_FORMATTER.with(|f| {
let mut borrow = f.try_borrow_mut();
let formatter = match borrow.as_deref_mut() {
Ok(formatter) => formatter,
@@ -259,31 +263,19 @@ where
Err(_) => &mut EventFormatter::new(),
};
formatter.reset();
formatter.format(
now,
event,
&ctx,
&self.skipped_field_indices,
self.extract_fields,
)?;
self.writer.make_writer().write_all(formatter.buffer())
});
);
// In case logging fails we generate a simpler JSON object.
if let Err(err) = res
&& let Ok(mut line) = serde_json::to_vec(&serde_json::json!( {
"timestamp": now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true),
"level": "ERROR",
"message": format_args!("cannot log event: {err:?}"),
"fields": {
"event": format_args!("{event:?}"),
},
}))
{
line.push(b'\n');
self.writer.make_writer().write_all(&line).ok();
}
let mut writer = self.writer.make_writer();
if writer.write_all(formatter.buffer()).is_err() {
Metrics::get().proxy.logging_errors_count.inc();
}
});
}
/// Registers a SpanFields instance as span extension.
@@ -356,10 +348,11 @@ struct CallsiteSpanInfo {
}
impl CallsiteSpanInfo {
fn new(metadata: &'static Metadata<'static>, extract_fields: &[&'static str]) -> Self {
// Start at 1 to reserve 0 for default.
static COUNTER: AtomicU32 = AtomicU32::new(1);
fn new(
callsite_name_ids: &papaya::HashMap<&'static str, u32, ahash::RandomState>,
metadata: &'static Metadata<'static>,
extract_fields: &[&'static str],
) -> Self {
let names: Vec<&'static str> = metadata.fields().iter().map(|f| f.name()).collect();
// get all the indices of span fields we want to focus
@@ -372,8 +365,18 @@ impl CallsiteSpanInfo {
// normalized_name is unique for each callsite, but it is not
// unified across separate proxy instances.
// todo: can we do better here?
let cid = COUNTER.fetch_add(1, Ordering::Relaxed);
let normalized_name = format!("{}#{cid}", metadata.name()).into();
let cid = *callsite_name_ids
.pin()
.update_or_insert(metadata.name(), |&cid| cid + 1, 0);
// we hope that most span names are unique, in which case this will always be 0
let normalized_name = if cid == 0 {
metadata.name().into()
} else {
// if the span name is not unique, add the numeric ID to span name to distinguish it.
// sadly this is non-determinstic, across restarts but we should fix it by disambiguating re-used span names instead.
format!("{}#{cid}", metadata.name()).into()
};
Self {
extract,
@@ -382,9 +385,24 @@ impl CallsiteSpanInfo {
}
}
#[derive(Clone)]
struct RawValue(Box<[u8]>);
impl RawValue {
fn new(v: impl json::ValueEncoder) -> Self {
Self(json::value_to_vec!(|val| v.encode(val)).into_boxed_slice())
}
}
impl json::ValueEncoder for &RawValue {
fn encode(self, v: json::ValueSer<'_>) {
v.write_raw_json(&self.0);
}
}
/// Stores span field values recorded during the spans lifetime.
struct SpanFields {
values: [serde_json::Value; MAX_TRACING_FIELDS],
values: [Option<RawValue>; MAX_TRACING_FIELDS],
/// cached span info so we can avoid extra hashmap lookups in the hot path.
span_info: CallsiteSpanInfo,
@@ -394,7 +412,7 @@ impl SpanFields {
fn new(span_info: CallsiteSpanInfo) -> Self {
Self {
span_info,
values: [const { serde_json::Value::Null }; MAX_TRACING_FIELDS],
values: [const { None }; MAX_TRACING_FIELDS],
}
}
}
@@ -402,55 +420,55 @@ impl SpanFields {
impl tracing::field::Visit for SpanFields {
#[inline]
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
}
#[inline]
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
}
#[inline]
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
}
#[inline]
fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
if let Ok(value) = i64::try_from(value) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
} else {
self.values[field.index()] = serde_json::Value::from(format!("{value}"));
self.values[field.index()] = Some(RawValue::new(format_args!("{value}")));
}
}
#[inline]
fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
if let Ok(value) = u64::try_from(value) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
} else {
self.values[field.index()] = serde_json::Value::from(format!("{value}"));
self.values[field.index()] = Some(RawValue::new(format_args!("{value}")));
}
}
#[inline]
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
}
#[inline]
fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
}
#[inline]
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
self.values[field.index()] = serde_json::Value::from(value);
self.values[field.index()] = Some(RawValue::new(value));
}
#[inline]
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
self.values[field.index()] = serde_json::Value::from(format!("{value:?}"));
self.values[field.index()] = Some(RawValue::new(format_args!("{value:?}")));
}
#[inline]
@@ -459,7 +477,7 @@ impl tracing::field::Visit for SpanFields {
field: &tracing::field::Field,
value: &(dyn std::error::Error + 'static),
) {
self.values[field.index()] = serde_json::Value::from(format!("{value}"));
self.values[field.index()] = Some(RawValue::new(format_args!("{value}")));
}
}
@@ -508,11 +526,6 @@ impl EventFormatter {
&self.logline_buffer
}
#[inline]
fn reset(&mut self) {
self.logline_buffer.clear();
}
fn format<S>(
&mut self,
now: DateTime<Utc>,
@@ -520,8 +533,7 @@ impl EventFormatter {
ctx: &Context<'_, S>,
skipped_field_indices: &CallsiteMap<SkippedFieldIndices>,
extract_fields: &'static [&'static str],
) -> io::Result<()>
where
) where
S: Subscriber + for<'a> LookupSpan<'a>,
{
let timestamp = now.to_rfc3339_opts(chrono::SecondsFormat::Micros, true);
@@ -536,78 +548,99 @@ impl EventFormatter {
.copied()
.unwrap_or_default();
let mut serialize = || {
let mut serializer = serde_json::Serializer::new(&mut self.logline_buffer);
let mut serializer = serializer.serialize_map(None)?;
self.logline_buffer.clear();
let serializer = json::ValueSer::new(&mut self.logline_buffer);
json::value_as_object!(|serializer| {
// Timestamp comes first, so raw lines can be sorted by timestamp.
serializer.serialize_entry("timestamp", &timestamp)?;
serializer.entry("timestamp", &*timestamp);
// Level next.
serializer.serialize_entry("level", &meta.level().as_str())?;
serializer.entry("level", meta.level().as_str());
// Message next.
serializer.serialize_key("message")?;
let mut message_extractor =
MessageFieldExtractor::new(serializer, skipped_field_indices);
MessageFieldExtractor::new(serializer.key("message"), skipped_field_indices);
event.record(&mut message_extractor);
let mut serializer = message_extractor.into_serializer()?;
message_extractor.finish();
// Direct message fields.
let mut fields_present = FieldsPresent(false, skipped_field_indices);
event.record(&mut fields_present);
if fields_present.0 {
serializer.serialize_entry(
"fields",
&SerializableEventFields(event, skipped_field_indices),
)?;
{
let mut message_skipper = MessageFieldSkipper::new(
serializer.key("fields").object(),
skipped_field_indices,
);
event.record(&mut message_skipper);
// rollback if no fields are present.
if message_skipper.present {
message_skipper.serializer.finish();
}
}
let spans = SerializableSpans {
// collect all spans from parent to root.
spans: ctx
let mut extracted = ExtractedSpanFields::new(extract_fields);
let spans = serializer.key("spans");
json::value_as_object!(|spans| {
let parent_spans = ctx
.event_span(event)
.map_or(vec![], |parent| parent.scope().collect()),
extracted: ExtractedSpanFields::new(extract_fields),
};
serializer.serialize_entry("spans", &spans)?;
.map_or(vec![], |parent| parent.scope().collect());
for span in parent_spans.iter().rev() {
let ext = span.extensions();
// all spans should have this extension.
let Some(fields) = ext.get() else { continue };
extracted.layer_span(fields);
let SpanFields { values, span_info } = fields;
let span_fields = spans.key(&*span_info.normalized_name);
json::value_as_object!(|span_fields| {
for (field, value) in std::iter::zip(span.metadata().fields(), values) {
if let Some(value) = value {
span_fields.entry(field.name(), value);
}
}
});
}
});
// TODO: thread-local cache?
let pid = std::process::id();
// Skip adding pid 1 to reduce noise for services running in containers.
if pid != 1 {
serializer.serialize_entry("process_id", &pid)?;
serializer.entry("process_id", pid);
}
THREAD_ID.with(|tid| serializer.serialize_entry("thread_id", tid))?;
THREAD_ID.with(|tid| serializer.entry("thread_id", tid));
// TODO: tls cache? name could change
if let Some(thread_name) = std::thread::current().name()
&& !thread_name.is_empty()
&& thread_name != "tokio-runtime-worker"
{
serializer.serialize_entry("thread_name", thread_name)?;
serializer.entry("thread_name", thread_name);
}
if let Some(task_id) = tokio::task::try_id() {
serializer.serialize_entry("task_id", &format_args!("{task_id}"))?;
serializer.entry("task_id", format_args!("{task_id}"));
}
serializer.serialize_entry("target", meta.target())?;
serializer.entry("target", meta.target());
// Skip adding module if it's the same as target.
if let Some(module) = meta.module_path()
&& module != meta.target()
{
serializer.serialize_entry("module", module)?;
serializer.entry("module", module);
}
if let Some(file) = meta.file() {
if let Some(line) = meta.line() {
serializer.serialize_entry("src", &format_args!("{file}:{line}"))?;
serializer.entry("src", format_args!("{file}:{line}"));
} else {
serializer.serialize_entry("src", file)?;
serializer.entry("src", file);
}
}
@@ -616,124 +649,104 @@ impl EventFormatter {
let otel_spanref = otel_context.span();
let span_context = otel_spanref.span_context();
if span_context.is_valid() {
serializer.serialize_entry(
"trace_id",
&format_args!("{}", span_context.trace_id()),
)?;
serializer.entry("trace_id", format_args!("{}", span_context.trace_id()));
}
}
if spans.extracted.has_values() {
if extracted.has_values() {
// TODO: add fields from event, too?
serializer.serialize_entry("extract", &spans.extracted)?;
let extract = serializer.key("extract");
json::value_as_object!(|extract| {
for (key, value) in std::iter::zip(extracted.names, extracted.values) {
if let Some(value) = value {
extract.entry(*key, &value);
}
}
});
}
});
serializer.end()
};
serialize().map_err(io::Error::other)?;
self.logline_buffer.push(b'\n');
Ok(())
}
}
/// Extracts the message field that's mixed will other fields.
struct MessageFieldExtractor<S: serde::ser::SerializeMap> {
serializer: S,
struct MessageFieldExtractor<'buf> {
serializer: Option<json::ValueSer<'buf>>,
skipped_field_indices: SkippedFieldIndices,
state: Option<Result<(), S::Error>>,
}
impl<S: serde::ser::SerializeMap> MessageFieldExtractor<S> {
impl<'buf> MessageFieldExtractor<'buf> {
#[inline]
fn new(serializer: S, skipped_field_indices: SkippedFieldIndices) -> Self {
fn new(serializer: json::ValueSer<'buf>, skipped_field_indices: SkippedFieldIndices) -> Self {
Self {
serializer,
serializer: Some(serializer),
skipped_field_indices,
state: None,
}
}
#[inline]
fn into_serializer(mut self) -> Result<S, S::Error> {
match self.state {
Some(Ok(())) => {}
Some(Err(err)) => return Err(err),
None => self.serializer.serialize_value("")?,
fn finish(self) {
if let Some(ser) = self.serializer {
ser.value("");
}
Ok(self.serializer)
}
#[inline]
fn accept_field(&self, field: &tracing::field::Field) -> bool {
self.state.is_none()
&& field.name() == MESSAGE_FIELD
fn record_field(&mut self, field: &tracing::field::Field, v: impl json::ValueEncoder) {
if field.name() == MESSAGE_FIELD
&& !self.skipped_field_indices.contains(field.index())
&& let Some(ser) = self.serializer.take()
{
ser.value(v);
}
}
}
impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtractor<S> {
impl tracing::field::Visit for MessageFieldExtractor<'_> {
#[inline]
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&format_args!("{value:x?}")));
}
self.record_field(field, format_args!("{value:x?}"));
}
#[inline]
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&value));
}
self.record_field(field, value);
}
#[inline]
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&format_args!("{value:?}")));
}
self.record_field(field, format_args!("{value:?}"));
}
#[inline]
@@ -742,147 +755,83 @@ impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldExtracto
field: &tracing::field::Field,
value: &(dyn std::error::Error + 'static),
) {
if self.accept_field(field) {
self.state = Some(self.serializer.serialize_value(&format_args!("{value}")));
}
}
}
/// Checks if there's any fields and field values present. If not, the JSON subobject
/// can be skipped.
// This is entirely optional and only cosmetic, though maybe helps a
// bit during log parsing in dashboards when there's no field with empty object.
struct FieldsPresent(pub bool, SkippedFieldIndices);
// Even though some methods have an overhead (error, bytes) it is assumed the
// compiler won't include this since we ignore the value entirely.
impl tracing::field::Visit for FieldsPresent {
#[inline]
fn record_debug(&mut self, field: &tracing::field::Field, _: &dyn std::fmt::Debug) {
if !self.1.contains(field.index())
&& field.name() != MESSAGE_FIELD
&& !field.name().starts_with("log.")
{
self.0 |= true;
}
}
}
/// Serializes the fields directly supplied with a log event.
struct SerializableEventFields<'a, 'event>(&'a tracing::Event<'event>, SkippedFieldIndices);
impl serde::ser::Serialize for SerializableEventFields<'_, '_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
use serde::ser::SerializeMap;
let serializer = serializer.serialize_map(None)?;
let mut message_skipper = MessageFieldSkipper::new(serializer, self.1);
self.0.record(&mut message_skipper);
let serializer = message_skipper.into_serializer()?;
serializer.end()
self.record_field(field, format_args!("{value}"));
}
}
/// A tracing field visitor that skips the message field.
struct MessageFieldSkipper<S: serde::ser::SerializeMap> {
serializer: S,
struct MessageFieldSkipper<'buf> {
serializer: json::ObjectSer<'buf>,
skipped_field_indices: SkippedFieldIndices,
state: Result<(), S::Error>,
present: bool,
}
impl<S: serde::ser::SerializeMap> MessageFieldSkipper<S> {
impl<'buf> MessageFieldSkipper<'buf> {
#[inline]
fn new(serializer: S, skipped_field_indices: SkippedFieldIndices) -> Self {
fn new(serializer: json::ObjectSer<'buf>, skipped_field_indices: SkippedFieldIndices) -> Self {
Self {
serializer,
skipped_field_indices,
state: Ok(()),
present: false,
}
}
#[inline]
fn accept_field(&self, field: &tracing::field::Field) -> bool {
self.state.is_ok()
&& field.name() != MESSAGE_FIELD
fn record_field(&mut self, field: &tracing::field::Field, v: impl json::ValueEncoder) {
if field.name() != MESSAGE_FIELD
&& !field.name().starts_with("log.")
&& !self.skipped_field_indices.contains(field.index())
}
#[inline]
fn into_serializer(self) -> Result<S, S::Error> {
self.state?;
Ok(self.serializer)
{
self.serializer.entry(field.name(), v);
self.present |= true;
}
}
}
impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<S> {
impl tracing::field::Visit for MessageFieldSkipper<'_> {
#[inline]
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_i128(&mut self, field: &tracing::field::Field, value: i128) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_u128(&mut self, field: &tracing::field::Field, value: u128) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_bytes(&mut self, field: &tracing::field::Field, value: &[u8]) {
if self.accept_field(field) {
self.state = self
.serializer
.serialize_entry(field.name(), &format_args!("{value:x?}"));
}
self.record_field(field, format_args!("{value:x?}"));
}
#[inline]
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
if self.accept_field(field) {
self.state = self.serializer.serialize_entry(field.name(), &value);
}
self.record_field(field, value);
}
#[inline]
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
if self.accept_field(field) {
self.state = self
.serializer
.serialize_entry(field.name(), &format_args!("{value:?}"));
}
self.record_field(field, format_args!("{value:?}"));
}
#[inline]
@@ -891,131 +840,40 @@ impl<S: serde::ser::SerializeMap> tracing::field::Visit for MessageFieldSkipper<
field: &tracing::field::Field,
value: &(dyn std::error::Error + 'static),
) {
if self.accept_field(field) {
self.state = self.serializer.serialize_value(&format_args!("{value}"));
}
}
}
/// Serializes the span stack from root to leaf (parent of event) as object
/// with the span names as keys. To prevent collision we append a numberic value
/// to the name. Also, collects any span fields we're interested in. Last one
/// wins.
struct SerializableSpans<'ctx, S>
where
S: for<'lookup> LookupSpan<'lookup>,
{
spans: Vec<SpanRef<'ctx, S>>,
extracted: ExtractedSpanFields,
}
impl<S> serde::ser::Serialize for SerializableSpans<'_, S>
where
S: for<'lookup> LookupSpan<'lookup>,
{
fn serialize<Ser>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error>
where
Ser: serde::ser::Serializer,
{
let mut serializer = serializer.serialize_map(None)?;
for span in self.spans.iter().rev() {
let ext = span.extensions();
// all spans should have this extension.
let Some(fields) = ext.get() else { continue };
self.extracted.layer_span(fields);
let SpanFields { values, span_info } = fields;
serializer.serialize_entry(
&*span_info.normalized_name,
&SerializableSpanFields {
fields: span.metadata().fields(),
values,
},
)?;
}
serializer.end()
}
}
/// Serializes the span fields as object.
struct SerializableSpanFields<'span> {
fields: &'span tracing::field::FieldSet,
values: &'span [serde_json::Value; MAX_TRACING_FIELDS],
}
impl serde::ser::Serialize for SerializableSpanFields<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::ser::Serializer,
{
let mut serializer = serializer.serialize_map(None)?;
for (field, value) in std::iter::zip(self.fields, self.values) {
if value.is_null() {
continue;
}
serializer.serialize_entry(field.name(), value)?;
}
serializer.end()
self.record_field(field, format_args!("{value}"));
}
}
struct ExtractedSpanFields {
names: &'static [&'static str],
values: RefCell<Vec<serde_json::Value>>,
values: Vec<Option<RawValue>>,
}
impl ExtractedSpanFields {
fn new(names: &'static [&'static str]) -> Self {
ExtractedSpanFields {
names,
values: RefCell::new(vec![serde_json::Value::Null; names.len()]),
values: vec![None; names.len()],
}
}
fn layer_span(&self, fields: &SpanFields) {
let mut v = self.values.borrow_mut();
fn layer_span(&mut self, fields: &SpanFields) {
let SpanFields { values, span_info } = fields;
// extract the fields
for (i, &j) in span_info.extract.iter().enumerate() {
let Some(value) = values.get(j) else { continue };
let Some(Some(value)) = values.get(j) else {
continue;
};
if !value.is_null() {
// TODO: replace clone with reference, if possible.
v[i] = value.clone();
}
// TODO: replace clone with reference, if possible.
self.values[i] = Some(value.clone());
}
}
#[inline]
fn has_values(&self) -> bool {
self.values.borrow().iter().any(|v| !v.is_null())
}
}
impl serde::ser::Serialize for ExtractedSpanFields {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::ser::Serializer,
{
let mut serializer = serializer.serialize_map(None)?;
let values = self.values.borrow();
for (key, value) in std::iter::zip(self.names, &*values) {
if value.is_null() {
continue;
}
serializer.serialize_entry(key, value)?;
}
serializer.end()
self.values.iter().any(|v| v.is_some())
}
}
@@ -1070,6 +928,7 @@ mod tests {
clock: clock.clone(),
skipped_field_indices: papaya::HashMap::default(),
span_info: papaya::HashMap::default(),
callsite_name_ids: papaya::HashMap::default(),
writer: buffer.clone(),
extract_fields: &["x"],
};
@@ -1078,14 +937,16 @@ mod tests {
tracing::subscriber::with_default(registry, || {
info_span!("some_span", x = 24).in_scope(|| {
info_span!("some_span", x = 40, x = 41, x = 42).in_scope(|| {
tracing::error!(
a = 1,
a = 2,
a = 3,
message = "explicit message field",
"implicit message field"
);
info_span!("some_other_span", y = 30).in_scope(|| {
info_span!("some_span", x = 40, x = 41, x = 42).in_scope(|| {
tracing::error!(
a = 1,
a = 2,
a = 3,
message = "explicit message field",
"implicit message field"
);
});
});
});
});
@@ -1104,12 +965,15 @@ mod tests {
"a": 3,
},
"spans": {
"some_span#1":{
"some_span":{
"x": 24,
},
"some_span#2": {
"some_other_span": {
"y": 30,
},
"some_span#1": {
"x": 42,
}
},
},
"extract": {
"x": 42,

View File

@@ -112,6 +112,9 @@ pub struct ProxyMetrics {
/// Number of bytes sent/received between all clients and backends.
pub io_bytes: CounterVec<StaticLabelSet<Direction>>,
/// Number of IO errors while logging.
pub logging_errors_count: Counter,
/// Number of errors by a given classification.
pub errors_total: CounterVec<StaticLabelSet<crate::error::ErrorKind>>,

View File

@@ -110,7 +110,7 @@ where
debug!(error = ?err, COULD_NOT_CONNECT);
let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
// If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
// If we just received this from cplane and not from the cache, we shouldn't retry.
// Do not need to retrieve a new node_info, just return the old one.
if !should_retry(&err, num_retries, compute.retry) {
Metrics::get().proxy.retries_metric.observe(

View File

@@ -195,15 +195,18 @@ impl NeonOptions {
// proxy options:
/// `PARAMS_COMPAT` allows opting in to forwarding all startup parameters from client to compute.
pub const PARAMS_COMPAT: &str = "proxy_params_compat";
pub const PARAMS_COMPAT: &'static str = "proxy_params_compat";
// cplane options:
/// `LSN` allows provisioning an ephemeral compute with time-travel to the provided LSN.
const LSN: &str = "lsn";
const LSN: &'static str = "lsn";
/// `TIMESTAMP` allows provisioning an ephemeral compute with time-travel to the provided timestamp.
const TIMESTAMP: &'static str = "timestamp";
/// `ENDPOINT_TYPE` allows configuring an ephemeral compute to be read_only or read_write.
const ENDPOINT_TYPE: &str = "endpoint_type";
const ENDPOINT_TYPE: &'static str = "endpoint_type";
pub(crate) fn parse_params(params: &StartupMessageParams) -> Self {
params
@@ -228,6 +231,7 @@ impl NeonOptions {
// This is not a cplane option, we know it does not create ephemeral computes.
Self::PARAMS_COMPAT => false,
Self::LSN => true,
Self::TIMESTAMP => true,
Self::ENDPOINT_TYPE => true,
// err on the side of caution. any cplane options we don't know about
// might lead to ephemeral computes.

View File

@@ -265,10 +265,7 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
return Ok(());
}
let mut conn = match try_connect(&redis).await {
Ok(conn) => {
handler.cache.increment_active_listeners().await;
conn
}
Ok(conn) => conn,
Err(e) => {
tracing::error!(
"failed to connect to redis: {e}, will try to reconnect in {RECONNECT_TIMEOUT:#?}"
@@ -287,11 +284,9 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
}
}
if cancellation_token.is_cancelled() {
handler.cache.decrement_active_listeners().await;
return Ok(());
}
}
handler.cache.decrement_active_listeners().await;
}
}

View File

@@ -32,7 +32,7 @@ psutil = "^5.9.4"
types-psutil = "^5.9.5.12"
types-toml = "^0.10.8.6"
pytest-httpserver = "^1.0.8"
aiohttp = "3.10.11"
aiohttp = "3.12.14"
pytest-rerunfailures = "^15.0"
types-pytest-lazy-fixture = "^0.6.3.3"
pytest-split = "^0.8.1"

View File

@@ -58,6 +58,7 @@ metrics.workspace = true
pem.workspace = true
postgres_backend.workspace = true
postgres_ffi.workspace = true
postgres_ffi_types.workspace = true
postgres_versioninfo.workspace = true
pq_proto.workspace = true
remote_storage.workspace = true
@@ -71,6 +72,7 @@ http-utils.workspace = true
utils.workspace = true
wal_decoder.workspace = true
env_logger.workspace = true
nix.workspace = true
workspace_hack.workspace = true

Some files were not shown because too many files have changed in this diff Show More