mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-29 08:10:38 +00:00
Compare commits
28 Commits
min_prefet
...
problame/f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0f400096b1 | ||
|
|
9baacfa406 | ||
|
|
904a63dff5 | ||
|
|
67dbe63275 | ||
|
|
8662463b06 | ||
|
|
78ad89b4d5 | ||
|
|
9c63b7c39c | ||
|
|
69918a041c | ||
|
|
3306c5045b | ||
|
|
137328f304 | ||
|
|
19b0a79968 | ||
|
|
5d50c3c086 | ||
|
|
10955dfb52 | ||
|
|
a3e05b5abf | ||
|
|
2156d02658 | ||
|
|
9e923bcc9b | ||
|
|
3a3062d236 | ||
|
|
01cd326153 | ||
|
|
a1095efd85 | ||
|
|
75d4ccb05c | ||
|
|
415cdff336 | ||
|
|
27dc11f5cc | ||
|
|
ac5279e600 | ||
|
|
9aeee51bf3 | ||
|
|
fc3f55d236 | ||
|
|
8a04a62ecd | ||
|
|
4adc3bdd3a | ||
|
|
f29b9737cc |
@@ -27,4 +27,4 @@
|
||||
!storage_controller/
|
||||
!vendor/postgres-*/
|
||||
!workspace_hack/
|
||||
!build-tools/patches
|
||||
!build_tools/patches
|
||||
|
||||
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -31,7 +31,6 @@ config-variables:
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- PREWARM_PGBENCH_SIZE
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
|
||||
@@ -176,11 +176,7 @@ runs:
|
||||
fi
|
||||
|
||||
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
|
||||
# We don't use code coverage for regression tests (the step is disabled),
|
||||
# so there's no need to collect it.
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
# cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
cov_prefix=()
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
else
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
@@ -150,7 +150,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
@@ -162,7 +162,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
@@ -174,7 +174,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
@@ -186,7 +186,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Build all
|
||||
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
||||
|
||||
72
.github/workflows/benchmarking.yml
vendored
72
.github/workflows/benchmarking.yml
vendored
@@ -219,7 +219,6 @@ jobs:
|
||||
--ignore test_runner/performance/test_cumulative_statistics_persistence.py
|
||||
--ignore test_runner/performance/test_perf_many_relations.py
|
||||
--ignore test_runner/performance/test_perf_oltp_large_tenant.py
|
||||
--ignore test_runner/performance/test_lfc_prewarm.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -411,77 +410,6 @@ jobs:
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
prewarm-test:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 17
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-staging"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run prewarm benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_lfc_prewarm.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
generate-matrices:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
|
||||
DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
|
||||
IMAGE_TAG: |
|
||||
${{ hashFiles('build-tools/Dockerfile',
|
||||
${{ hashFiles('build-tools.Dockerfile',
|
||||
'.github/workflows/build-build-tools-image.yml') }}
|
||||
run: |
|
||||
echo "archs=${ARCHS}" | tee -a ${GITHUB_OUTPUT}
|
||||
@@ -144,7 +144,7 @@ jobs:
|
||||
|
||||
- uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
|
||||
with:
|
||||
file: build-tools/Dockerfile
|
||||
file: build-tools.Dockerfile
|
||||
context: .
|
||||
provenance: false
|
||||
push: true
|
||||
|
||||
8
.gitmodules
vendored
8
.gitmodules
vendored
@@ -1,16 +1,16 @@
|
||||
[submodule "vendor/postgres-v14"]
|
||||
path = vendor/postgres-v14
|
||||
url = ../postgres.git
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_14_STABLE_neon
|
||||
[submodule "vendor/postgres-v15"]
|
||||
path = vendor/postgres-v15
|
||||
url = ../postgres.git
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_15_STABLE_neon
|
||||
[submodule "vendor/postgres-v16"]
|
||||
path = vendor/postgres-v16
|
||||
url = ../postgres.git
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_16_STABLE_neon
|
||||
[submodule "vendor/postgres-v17"]
|
||||
path = vendor/postgres-v17
|
||||
url = ../postgres.git
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_17_STABLE_neon
|
||||
|
||||
35
Cargo.lock
generated
35
Cargo.lock
generated
@@ -1330,7 +1330,6 @@ dependencies = [
|
||||
"chrono",
|
||||
"clap",
|
||||
"compute_api",
|
||||
"fail",
|
||||
"flate2",
|
||||
"futures",
|
||||
"hostname-validator",
|
||||
@@ -1339,6 +1338,7 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"neon_failpoint",
|
||||
"nix 0.30.1",
|
||||
"notify",
|
||||
"num_cpus",
|
||||
@@ -2891,13 +2891,13 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"bytes",
|
||||
"camino",
|
||||
"fail",
|
||||
"futures",
|
||||
"hyper 0.14.30",
|
||||
"itertools 0.10.5",
|
||||
"jemalloc_pprof",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"neon_failpoint",
|
||||
"once_cell",
|
||||
"pprof",
|
||||
"regex",
|
||||
@@ -3852,6 +3852,23 @@ dependencies = [
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "neon_failpoint"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"either",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"serde",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "never-say-never"
|
||||
version = "6.6.666"
|
||||
@@ -4296,7 +4313,6 @@ dependencies = [
|
||||
"pageserver_client",
|
||||
"pageserver_client_grpc",
|
||||
"pageserver_page_api",
|
||||
"pprof",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"serde",
|
||||
@@ -4358,7 +4374,6 @@ dependencies = [
|
||||
"either",
|
||||
"enum-map",
|
||||
"enumset",
|
||||
"fail",
|
||||
"futures",
|
||||
"hashlink",
|
||||
"hex",
|
||||
@@ -4373,6 +4388,7 @@ dependencies = [
|
||||
"jsonwebtoken",
|
||||
"md5",
|
||||
"metrics",
|
||||
"neon_failpoint",
|
||||
"nix 0.30.1",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
@@ -5290,7 +5306,6 @@ dependencies = [
|
||||
"async-trait",
|
||||
"atomic-take",
|
||||
"aws-config",
|
||||
"aws-credential-types",
|
||||
"aws-sdk-iam",
|
||||
"aws-sigv4",
|
||||
"base64 0.22.1",
|
||||
@@ -5330,7 +5345,6 @@ dependencies = [
|
||||
"itoa",
|
||||
"jose-jwa",
|
||||
"jose-jwk",
|
||||
"json",
|
||||
"lasso",
|
||||
"measured",
|
||||
"metrics",
|
||||
@@ -6194,7 +6208,6 @@ dependencies = [
|
||||
"criterion",
|
||||
"desim",
|
||||
"env_logger",
|
||||
"fail",
|
||||
"futures",
|
||||
"hex",
|
||||
"http 1.1.0",
|
||||
@@ -6204,6 +6217,7 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"neon_failpoint",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
@@ -6890,7 +6904,6 @@ dependencies = [
|
||||
"diesel",
|
||||
"diesel-async",
|
||||
"diesel_migrations",
|
||||
"fail",
|
||||
"futures",
|
||||
"governor",
|
||||
"hex",
|
||||
@@ -6903,6 +6916,7 @@ dependencies = [
|
||||
"lasso",
|
||||
"measured",
|
||||
"metrics",
|
||||
"neon_failpoint",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
@@ -6994,7 +7008,6 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"reqwest",
|
||||
"safekeeper_api",
|
||||
"serde_json",
|
||||
"storage_controller_client",
|
||||
"tokio",
|
||||
@@ -7564,7 +7577,6 @@ dependencies = [
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8169,7 +8181,7 @@ dependencies = [
|
||||
"const_format",
|
||||
"criterion",
|
||||
"diatomic-waker",
|
||||
"fail",
|
||||
"either",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hex",
|
||||
@@ -8177,6 +8189,7 @@ dependencies = [
|
||||
"humantime",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"neon_failpoint",
|
||||
"nix 0.30.1",
|
||||
"once_cell",
|
||||
"pem",
|
||||
|
||||
@@ -21,6 +21,7 @@ members = [
|
||||
"workspace_hack",
|
||||
"libs/compute_api",
|
||||
"libs/http-utils",
|
||||
"libs/neon_failpoint",
|
||||
"libs/pageserver_api",
|
||||
"libs/postgres_ffi",
|
||||
"libs/postgres_ffi_types",
|
||||
@@ -97,7 +98,6 @@ diatomic-waker = { version = "0.2.3" }
|
||||
either = "1.8"
|
||||
enum-map = "2.4.2"
|
||||
enumset = "1.0.12"
|
||||
fail = "0.5.0"
|
||||
fallible-iterator = "0.2"
|
||||
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
|
||||
futures = "0.3"
|
||||
@@ -201,7 +201,7 @@ tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.g
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.12.0"
|
||||
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
|
||||
tokio-stream = { version = "0.1", features = ["sync"] }
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
|
||||
toml = "0.8"
|
||||
@@ -258,6 +258,7 @@ desim = { version = "0.1", path = "./libs/desim" }
|
||||
endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
|
||||
http-utils = { version = "0.1", path = "./libs/http-utils/" }
|
||||
metrics = { version = "0.1", path = "./libs/metrics/" }
|
||||
neon_failpoint = { version = "0.1", path = "./libs/neon_failpoint/" }
|
||||
neon-shmem = { version = "0.1", path = "./libs/neon-shmem/" }
|
||||
pageserver = { path = "./pageserver" }
|
||||
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
|
||||
|
||||
@@ -35,7 +35,7 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
|
||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||
|
||||
COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
||||
COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
||||
|
||||
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
||||
set -e && \
|
||||
@@ -9,7 +9,7 @@
|
||||
#
|
||||
# build-tools: This contains Rust compiler toolchain and other tools needed at compile
|
||||
# time. This is also used for the storage builds. This image is defined in
|
||||
# build-tools/Dockerfile.
|
||||
# build-tools.Dockerfile.
|
||||
#
|
||||
# build-deps: Contains C compiler, other build tools, and compile-time dependencies
|
||||
# needed to compile PostgreSQL and most extensions. (Some extensions need
|
||||
@@ -115,7 +115,7 @@ ARG EXTENSIONS=all
|
||||
FROM $BASE_IMAGE_SHA AS build-deps
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
# Keep in sync with build-tools/Dockerfile
|
||||
# Keep in sync with build-tools.Dockerfile
|
||||
ENV PROTOC_VERSION=25.1
|
||||
|
||||
# Use strict mode for bash to catch errors early
|
||||
@@ -1790,7 +1790,7 @@ RUN set -e \
|
||||
#########################################################################################
|
||||
FROM build-deps AS exporters
|
||||
ARG TARGETARCH
|
||||
# Keep sql_exporter version same as in build-tools/Dockerfile and
|
||||
# Keep sql_exporter version same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py
|
||||
# See comment on the top of the file regading `echo`, `-e` and `\n`
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then\
|
||||
|
||||
@@ -7,7 +7,7 @@ license.workspace = true
|
||||
[features]
|
||||
default = []
|
||||
# Enables test specific features.
|
||||
testing = ["fail/failpoints"]
|
||||
testing = ["neon_failpoint/testing"]
|
||||
|
||||
[dependencies]
|
||||
async-compression.workspace = true
|
||||
@@ -23,7 +23,7 @@ camino.workspace = true
|
||||
chrono.workspace = true
|
||||
cfg-if.workspace = true
|
||||
clap.workspace = true
|
||||
fail.workspace = true
|
||||
neon_failpoint.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
|
||||
@@ -154,7 +154,7 @@ impl Cli {
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let scenario = failpoint_support::init();
|
||||
failpoint_support::init().unwrap();
|
||||
|
||||
// For historical reasons, the main thread that processes the config and launches postgres
|
||||
// is synchronous, but we always have this tokio runtime available and we "enter" it so
|
||||
@@ -201,8 +201,6 @@ fn main() -> Result<()> {
|
||||
|
||||
let exit_code = compute_node.run()?;
|
||||
|
||||
scenario.teardown();
|
||||
|
||||
deinit_and_exit(exit_code);
|
||||
}
|
||||
|
||||
|
||||
@@ -1040,8 +1040,6 @@ impl ComputeNode {
|
||||
PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
|
||||
};
|
||||
|
||||
self.fix_zenith_signal_neon_signal()?;
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.metrics.pageserver_connect_micros =
|
||||
connected.duration_since(started).as_micros() as u64;
|
||||
@@ -1051,27 +1049,6 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Move the Zenith signal file to Neon signal file location.
|
||||
/// This makes Compute compatible with older PageServers that don't yet
|
||||
/// know about the Zenith->Neon rename.
|
||||
fn fix_zenith_signal_neon_signal(&self) -> Result<()> {
|
||||
let datadir = Path::new(&self.params.pgdata);
|
||||
|
||||
let neonsig = datadir.join("neon.signal");
|
||||
|
||||
if neonsig.is_file() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let zenithsig = datadir.join("zenith.signal");
|
||||
|
||||
if zenithsig.is_file() {
|
||||
fs::copy(zenithsig, neonsig)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
|
||||
/// the connection was established, and the (compressed) size of the basebackup.
|
||||
fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||
@@ -2487,7 +2464,7 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
|
||||
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
|
||||
);
|
||||
}
|
||||
Err(err) => error!("could not get installed extensions: {err}"),
|
||||
Err(err) => error!("could not get installed extensions: {err:?}"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use http::StatusCode;
|
||||
use neon_failpoint::{configure_failpoint, configure_failpoint_with_context};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use tracing::info;
|
||||
use utils::failpoint_support::apply_failpoint;
|
||||
|
||||
pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
|
||||
|
||||
@@ -11,10 +12,16 @@ pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
|
||||
pub struct FailpointConfig {
|
||||
/// Name of the fail point
|
||||
pub name: String,
|
||||
/// List of actions to take, using the format described in `fail::cfg`
|
||||
/// List of actions to take, using the format described in neon_failpoint
|
||||
///
|
||||
/// We also support `actions = "exit"` to cause the fail point to immediately exit.
|
||||
/// We support actions: "pause", "sleep(N)", "return", "return(value)", "exit", "off", "panic(message)"
|
||||
/// Plus probability-based actions: "N%return(value)", "N%M*return(value)", "N%action", "N%M*action"
|
||||
pub actions: String,
|
||||
/// Optional context matching rules for conditional failpoints
|
||||
/// Each key-value pair specifies a context key and a regex pattern to match against
|
||||
/// All context matchers must match for the failpoint to trigger
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub context_matchers: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
use crate::http::JsonResponse;
|
||||
@@ -24,7 +31,7 @@ use crate::http::extract::Json;
|
||||
pub(in crate::http) async fn configure_failpoints(
|
||||
failpoints: Json<ConfigureFailpointsRequest>,
|
||||
) -> Response {
|
||||
if !fail::has_failpoints() {
|
||||
if !neon_failpoint::has_failpoints() {
|
||||
return JsonResponse::error(
|
||||
StatusCode::PRECONDITION_FAILED,
|
||||
"Cannot manage failpoints because neon was compiled without failpoints support",
|
||||
@@ -32,16 +39,21 @@ pub(in crate::http) async fn configure_failpoints(
|
||||
}
|
||||
|
||||
for fp in &*failpoints {
|
||||
info!("cfg failpoint: {} {}", fp.name, fp.actions);
|
||||
info!(
|
||||
"cfg failpoint: {} {} (context: {:?})",
|
||||
fp.name, fp.actions, fp.context_matchers
|
||||
);
|
||||
|
||||
// We recognize one extra "action" that's not natively recognized
|
||||
// by the failpoints crate: exit, to immediately kill the process
|
||||
let cfg_result = apply_failpoint(&fp.name, &fp.actions);
|
||||
let cfg_result = if let Some(context_matchers) = fp.context_matchers.clone() {
|
||||
configure_failpoint_with_context(&fp.name, &fp.actions, context_matchers)
|
||||
} else {
|
||||
configure_failpoint(&fp.name, &fp.actions)
|
||||
};
|
||||
|
||||
if let Err(e) = cfg_result {
|
||||
return JsonResponse::error(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("failed to configure failpoints: {e}"),
|
||||
format!("failed to configure failpoint '{}': {e}", fp.name),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use tokio_postgres::error::Error as PostgresError;
|
||||
use tokio_postgres::{Client, Config, NoTls};
|
||||
|
||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
@@ -11,7 +10,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
/// and to make database listing query here more explicit.
|
||||
///
|
||||
/// Limit the number of databases to 500 to avoid excessive load.
|
||||
async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
|
||||
async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
// `pg_database.datconnlimit = -2` means that the database is in the
|
||||
// invalid state
|
||||
let databases = client
|
||||
@@ -38,9 +37,7 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
|
||||
/// Same extension can be installed in multiple databases with different versions,
|
||||
/// so we report a separate metric (number of databases where it is installed)
|
||||
/// for each extension version.
|
||||
pub async fn get_installed_extensions(
|
||||
mut conf: Config,
|
||||
) -> Result<InstalledExtensions, PostgresError> {
|
||||
pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
|
||||
conf.application_name("compute_ctl:get_installed_extensions");
|
||||
let databases: Vec<String> = {
|
||||
let (mut client, connection) = conf.connect(NoTls).await?;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use anyhow::{Context, Result};
|
||||
use fail::fail_point;
|
||||
use neon_failpoint::fail_point;
|
||||
use tokio_postgres::{Client, Transaction};
|
||||
use tracing::{error, info};
|
||||
|
||||
@@ -40,13 +40,14 @@ impl<'m> MigrationRunner<'m> {
|
||||
// middle of applying a series of migrations fails in an expected
|
||||
// manner
|
||||
if cfg!(feature = "testing") {
|
||||
let fail = (|| {
|
||||
fail_point!("compute-migration", |fail_migration_id| {
|
||||
let fail = async {
|
||||
fail_point!("compute-migration", |fail_migration_id: Option<String>| {
|
||||
migration_id == fail_migration_id.unwrap().parse::<i64>().unwrap()
|
||||
});
|
||||
|
||||
false
|
||||
})();
|
||||
}
|
||||
.await;
|
||||
|
||||
if fail {
|
||||
return Err(anyhow::anyhow!(format!(
|
||||
|
||||
@@ -36,7 +36,7 @@ impl StorageBroker {
|
||||
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
let broker = &self.env.broker;
|
||||
|
||||
println!("Starting neon broker at {}", broker.client_url());
|
||||
print!("Starting neon broker at {}", broker.client_url());
|
||||
|
||||
let mut args = Vec::new();
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@
|
||||
//! config.json - passed to `compute_ctl`
|
||||
//! pgdata/
|
||||
//! postgresql.conf - copy of postgresql.conf created by `compute_ctl`
|
||||
//! neon.signal
|
||||
//! zenith.signal - copy of neon.signal, for backward compatibility
|
||||
//! zenith.signal
|
||||
//! <other PostgreSQL files>
|
||||
//! ```
|
||||
//!
|
||||
|
||||
@@ -217,9 +217,6 @@ pub struct NeonStorageControllerConf {
|
||||
pub posthog_config: Option<PostHogConfig>,
|
||||
|
||||
pub kick_secondary_downloads: Option<bool>,
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub shard_split_request_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -253,7 +250,6 @@ impl Default for NeonStorageControllerConf {
|
||||
timeline_safekeeper_count: None,
|
||||
posthog_config: None,
|
||||
kick_secondary_downloads: None,
|
||||
shard_split_request_timeout: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -303,7 +303,7 @@ impl PageServerNode {
|
||||
async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
// TODO: using a thread here because start_process() is not async but we need to call check_status()
|
||||
let datadir = self.repo_path();
|
||||
println!(
|
||||
print!(
|
||||
"Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
|
||||
self.conf.id,
|
||||
self.pg_connection_config.raw_address(),
|
||||
|
||||
@@ -127,7 +127,7 @@ impl SafekeeperNode {
|
||||
extra_opts: &[String],
|
||||
retry_timeout: &Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
println!(
|
||||
print!(
|
||||
"Starting safekeeper at '{}' in '{}', retrying for {:?}",
|
||||
self.pg_connection_config.raw_address(),
|
||||
self.datadir_path().display(),
|
||||
|
||||
@@ -648,13 +648,6 @@ impl StorageController {
|
||||
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
||||
}
|
||||
|
||||
if let Some(duration) = self.config.shard_split_request_timeout {
|
||||
args.push(format!(
|
||||
"--shard-split-request-timeout={}",
|
||||
humantime::Duration::from(duration)
|
||||
));
|
||||
}
|
||||
|
||||
let mut envs = vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
@@ -667,7 +660,7 @@ impl StorageController {
|
||||
));
|
||||
}
|
||||
|
||||
println!("Starting storage controller at {scheme}://{host}:{listen_port}");
|
||||
println!("Starting storage controller");
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
|
||||
@@ -14,7 +14,6 @@ humantime.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true
|
||||
reqwest.workspace = true
|
||||
safekeeper_api.workspace=true
|
||||
serde_json = { workspace = true, features = ["raw_value"] }
|
||||
storage_controller_client.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -11,7 +11,7 @@ use pageserver_api::controller_api::{
|
||||
PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
|
||||
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
||||
SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse, TimelineSafekeeperMigrateRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
|
||||
@@ -21,7 +21,6 @@ use pageserver_api::models::{
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
use pageserver_client::mgmt_api::{self};
|
||||
use reqwest::{Certificate, Method, StatusCode, Url};
|
||||
use safekeeper_api::models::TimelineLocateResponse;
|
||||
use storage_controller_client::control_api::Client;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
@@ -280,23 +279,6 @@ enum Command {
|
||||
#[arg(long)]
|
||||
concurrency: Option<usize>,
|
||||
},
|
||||
/// Locate safekeepers for a timeline from the storcon DB.
|
||||
TimelineLocate {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
timeline_id: TimelineId,
|
||||
},
|
||||
/// Migrate a timeline to a new set of safekeepers
|
||||
TimelineSafekeeperMigrate {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
timeline_id: TimelineId,
|
||||
/// Example: --new-sk-set 1,2,3
|
||||
#[arg(long, required = true, value_delimiter = ',')]
|
||||
new_sk_set: Vec<NodeId>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -476,7 +458,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
availability_zone_id: AvailabilityZone(availability_zone_id),
|
||||
node_ip_addr: None,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
@@ -1343,7 +1324,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
concurrency,
|
||||
} => {
|
||||
let mut path = format!(
|
||||
"v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
||||
"/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
||||
);
|
||||
|
||||
if let Some(c) = concurrency {
|
||||
@@ -1354,41 +1335,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
.dispatch::<(), ()>(Method::POST, path, None)
|
||||
.await?;
|
||||
}
|
||||
Command::TimelineLocate {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} => {
|
||||
let path = format!("debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate");
|
||||
|
||||
let resp = storcon_client
|
||||
.dispatch::<(), TimelineLocateResponse>(Method::GET, path, None)
|
||||
.await?;
|
||||
|
||||
let sk_set = resp.sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();
|
||||
let new_sk_set = resp
|
||||
.new_sk_set
|
||||
.as_ref()
|
||||
.map(|ids| ids.iter().map(|id| id.0 as i64).collect::<Vec<_>>());
|
||||
|
||||
println!("generation = {}", resp.generation);
|
||||
println!("sk_set = {sk_set:?}");
|
||||
println!("new_sk_set = {new_sk_set:?}");
|
||||
}
|
||||
Command::TimelineSafekeeperMigrate {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
new_sk_set,
|
||||
} => {
|
||||
let path = format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate");
|
||||
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::POST,
|
||||
path,
|
||||
Some(TimelineSafekeeperMigrateRequest { new_sk_set }),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -129,10 +129,9 @@ segment to bootstrap the WAL writing, but it doesn't contain the checkpoint reco
|
||||
changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
|
||||
from WAL.
|
||||
|
||||
This includes code to read the `neon.signal` (also `zenith.signal`) file, which tells the startup
|
||||
code the LSN to start at. When the `neon.signal` file is present, the startup uses that LSN
|
||||
instead of the last checkpoint's LSN. The system is known to be consistent at that LSN, without
|
||||
any WAL redo.
|
||||
This includes code to read the `zenith.signal` file, which tells the startup code the LSN to start
|
||||
at. When the `zenith.signal` file is present, the startup uses that LSN instead of the last
|
||||
checkpoint's LSN. The system is known to be consistent at that LSN, without any WAL redo.
|
||||
|
||||
|
||||
### How to get rid of the patch
|
||||
|
||||
@@ -9,7 +9,7 @@ anyhow.workspace = true
|
||||
arc-swap.workspace = true
|
||||
bytes.workspace = true
|
||||
camino.workspace = true
|
||||
fail.workspace = true
|
||||
neon_failpoint.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0.workspace = true
|
||||
itertools.workspace = true
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
use neon_failpoint::{configure_failpoint, configure_failpoint_with_context, has_failpoints};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::failpoint_support::apply_failpoint;
|
||||
|
||||
use crate::error::ApiError;
|
||||
use crate::json::{json_request, json_response};
|
||||
@@ -13,10 +14,16 @@ pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
|
||||
pub struct FailpointConfig {
|
||||
/// Name of the fail point
|
||||
pub name: String,
|
||||
/// List of actions to take, using the format described in `fail::cfg`
|
||||
/// List of actions to take, using the format described in neon_failpoint
|
||||
///
|
||||
/// We also support `actions = "exit"` to cause the fail point to immediately exit.
|
||||
/// We support actions: "pause", "sleep(N)", "return", "return(value)", "exit", "off", "panic(message)"
|
||||
/// Plus probability-based actions: "N%return(value)", "N%M*return(value)", "N%action", "N%M*action"
|
||||
pub actions: String,
|
||||
/// Optional context matching rules for conditional failpoints
|
||||
/// Each key-value pair specifies a context key and a regex pattern to match against
|
||||
/// All context matchers must match for the failpoint to trigger
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub context_matchers: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Configure failpoints through http.
|
||||
@@ -24,7 +31,7 @@ pub async fn failpoints_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
if !fail::has_failpoints() {
|
||||
if !has_failpoints() {
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"Cannot manage failpoints because neon was compiled without failpoints support"
|
||||
)));
|
||||
@@ -32,15 +39,24 @@ pub async fn failpoints_handler(
|
||||
|
||||
let failpoints: ConfigureFailpointsRequest = json_request(&mut request).await?;
|
||||
for fp in failpoints {
|
||||
tracing::info!("cfg failpoint: {} {}", fp.name, fp.actions);
|
||||
tracing::info!(
|
||||
"cfg failpoint: {} {} (context: {:?})",
|
||||
fp.name,
|
||||
fp.actions,
|
||||
fp.context_matchers
|
||||
);
|
||||
|
||||
// We recognize one extra "action" that's not natively recognized
|
||||
// by the failpoints crate: exit, to immediately kill the process
|
||||
let cfg_result = apply_failpoint(&fp.name, &fp.actions);
|
||||
let cfg_result = if let Some(context_matchers) = fp.context_matchers {
|
||||
configure_failpoint_with_context(&fp.name, &fp.actions, context_matchers)
|
||||
} else {
|
||||
configure_failpoint(&fp.name, &fp.actions)
|
||||
};
|
||||
|
||||
if let Err(err_msg) = cfg_result {
|
||||
if let Err(err) = cfg_result {
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"Failed to configure failpoints: {err_msg}"
|
||||
"Failed to configure failpoint '{}': {}",
|
||||
fp.name,
|
||||
err
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
27
libs/neon_failpoint/Cargo.toml
Normal file
27
libs/neon_failpoint/Cargo.toml
Normal file
@@ -0,0 +1,27 @@
|
||||
[package]
|
||||
name = "neon_failpoint"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tokio = { workspace = true, features = ["time", "sync", "rt-multi-thread"] }
|
||||
tokio-util = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
anyhow = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
either = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tracing-subscriber = { workspace = true, features = ["fmt"] }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
testing = []
|
||||
|
||||
[[example]]
|
||||
name = "context_demo"
|
||||
required-features = ["testing"]
|
||||
460
libs/neon_failpoint/README.md
Normal file
460
libs/neon_failpoint/README.md
Normal file
@@ -0,0 +1,460 @@
|
||||
# Neon Failpoint Library
|
||||
|
||||
A modern, async-first failpoint library for Neon, replacing the `fail` crate with enhanced functionality.
|
||||
|
||||
## Features
|
||||
|
||||
- **Async-first**: All failpoint operations are async and don't require `spawn_blocking`
|
||||
- **Context matching**: Failpoints can be configured to trigger only when specific context conditions are met
|
||||
- **Regex support**: Context values can be matched using regular expressions
|
||||
- **Cancellation support**: All operations support cancellation tokens
|
||||
- **Dynamic reconfiguration**: Paused and sleeping tasks automatically resume when failpoint configurations change
|
||||
- **Backward compatibility**: Drop-in replacement for existing `fail` crate usage
|
||||
|
||||
## Supported Actions
|
||||
|
||||
- `off` - Disable the failpoint
|
||||
- `pause` - Pause indefinitely until disabled, reconfigured, or cancelled
|
||||
- `sleep(N)` - Sleep for N milliseconds (can be interrupted by reconfiguration)
|
||||
- `return` - Return early (empty value)
|
||||
- `return(value)` - Return early with a specific value
|
||||
- `exit` - Exit the process immediately
|
||||
- `panic(message)` - Panic the process with a custom message
|
||||
- `N%return(value)` - Return with a specific value N% of the time (probability-based)
|
||||
- `N%M*return(value)` - Return with a specific value N% of the time, maximum M times
|
||||
- `N%action` - Execute any action N% of the time (probability-based)
|
||||
- `N%M*action` - Execute any action N% of the time, maximum M times
|
||||
|
||||
## Probability-Based Actions
|
||||
|
||||
The library supports probability-based failpoints that trigger only a percentage of the time:
|
||||
|
||||
```rust
|
||||
// 50% chance to return a value
|
||||
configure_failpoint("random_failure", "50%return(error)").unwrap();
|
||||
|
||||
// 10% chance to sleep, maximum 3 times
|
||||
configure_failpoint("occasional_delay", "10%3*sleep(1000)").unwrap();
|
||||
|
||||
// 25% chance to panic
|
||||
configure_failpoint("rare_panic", "25%panic(critical error)").unwrap();
|
||||
```
|
||||
|
||||
The probability system uses a counter to track how many times a probability-based action has been triggered, allowing for precise control over test scenarios.
|
||||
|
||||
## Dynamic Behavior
|
||||
|
||||
When a failpoint is reconfigured while tasks are waiting on it:
|
||||
|
||||
- **Paused tasks** will immediately resume and continue normal execution
|
||||
- **Sleeping tasks** will wake up early and continue normal execution
|
||||
- **Removed failpoints** will cause all waiting tasks to resume normally
|
||||
|
||||
The new configuration only applies to future hits of the failpoint, not to tasks that are already waiting. This allows for flexible testing scenarios where you can pause execution, inspect state, and then resume execution dynamically.
|
||||
|
||||
## Example: Dynamic Reconfiguration
|
||||
|
||||
```rust
|
||||
use neon_failpoint::{configure_failpoint, failpoint, FailpointResult};
|
||||
use tokio::time::Duration;
|
||||
|
||||
// Start a task that will hit a failpoint
|
||||
let task = tokio::spawn(async {
|
||||
println!("About to hit failpoint");
|
||||
match failpoint("test_pause", None).await {
|
||||
FailpointResult::Return(value) => println!("Returned: {}", value),
|
||||
FailpointResult::Continue => println!("Continued normally"),
|
||||
FailpointResult::Cancelled => println!("Cancelled"),
|
||||
}
|
||||
});
|
||||
|
||||
// Configure the failpoint to pause
|
||||
configure_failpoint("test_pause", "pause").unwrap();
|
||||
|
||||
// Let the task hit the failpoint and pause
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
|
||||
// Change the failpoint configuration - this will wake up the paused task
|
||||
// The task will resume and continue normally (not apply the new config)
|
||||
configure_failpoint("test_pause", "return(not_applied)").unwrap();
|
||||
|
||||
// The task will complete with Continue, not Return
|
||||
let result = task.await.unwrap();
|
||||
```
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```rust
|
||||
use neon_failpoint::{configure_failpoint, failpoint, FailpointResult};
|
||||
|
||||
// Configure a failpoint
|
||||
configure_failpoint("my_failpoint", "return(42)").unwrap();
|
||||
|
||||
// Use the failpoint
|
||||
match failpoint("my_failpoint", None).await {
|
||||
FailpointResult::Return(value) => {
|
||||
println!("Failpoint returned: {}", value);
|
||||
return value.parse().unwrap_or_default();
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
// Continue normal execution
|
||||
}
|
||||
FailpointResult::Cancelled => {
|
||||
// Handle cancellation
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Context-Based Failpoint Configuration
|
||||
|
||||
Context allows you to create **conditional failpoints** that only trigger when specific runtime conditions are met. This is particularly useful for testing scenarios where you want to inject failures only for specific tenants, operations, or other contextual conditions.
|
||||
|
||||
### Configuring Context-Based Failpoints
|
||||
|
||||
Use `configure_failpoint_with_context()` to set up failpoints with context matching:
|
||||
|
||||
```rust
|
||||
use neon_failpoint::configure_failpoint_with_context;
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut context_matchers = HashMap::new();
|
||||
context_matchers.insert("tenant_id".to_string(), "test_.*".to_string());
|
||||
context_matchers.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
configure_failpoint_with_context(
|
||||
"backup_operation", // failpoint name
|
||||
"return(simulated_failure)", // action to take
|
||||
context_matchers // context matching rules
|
||||
).unwrap();
|
||||
```
|
||||
|
||||
### Context Matching Rules
|
||||
|
||||
The context matching system works as follows:
|
||||
|
||||
1. **Key-Value Matching**: Each entry in `context_matchers` specifies a key that must exist in the runtime context
|
||||
2. **Regex Support**: Values in `context_matchers` are treated as regular expressions first
|
||||
3. **Fallback to Exact Match**: If the regex compilation fails, it falls back to exact string matching
|
||||
4. **ALL Must Match**: All context matchers must match for the failpoint to trigger
|
||||
|
||||
### Runtime Context Usage
|
||||
|
||||
When code hits a failpoint, it provides context using a `HashMap<String, String>`:
|
||||
|
||||
```rust
|
||||
use neon_failpoint::{failpoint, FailpointResult};
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
context.insert("operation".to_string(), "backup".to_string());
|
||||
context.insert("user_id".to_string(), "user_456".to_string());
|
||||
|
||||
match failpoint("backup_operation", Some(&context)) {
|
||||
either::Either::Left(result) => {
|
||||
match result {
|
||||
FailpointResult::Return(value) => {
|
||||
// This will only trigger if ALL context matchers match
|
||||
println!("Backup failed: {}", value);
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
// Continue with normal backup operation
|
||||
}
|
||||
FailpointResult::Cancelled => {}
|
||||
}
|
||||
}
|
||||
either::Either::Right(future) => {
|
||||
match future.await {
|
||||
FailpointResult::Return(value) => {
|
||||
// This will only trigger if ALL context matchers match
|
||||
println!("Backup failed: {}", value);
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
// Continue with normal backup operation
|
||||
}
|
||||
FailpointResult::Cancelled => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Context Matching Examples
|
||||
|
||||
#### Regex Matching
|
||||
```rust
|
||||
// Configure to match test tenants only
|
||||
let mut matchers = HashMap::new();
|
||||
matchers.insert("tenant_id".to_string(), "test_.*".to_string());
|
||||
|
||||
configure_failpoint_with_context("test_failpoint", "pause", matchers).unwrap();
|
||||
|
||||
// This will match
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
// This will NOT match
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "prod_123".to_string());
|
||||
```
|
||||
|
||||
#### Multiple Conditions
|
||||
```rust
|
||||
// Must match BOTH tenant pattern AND operation
|
||||
let mut matchers = HashMap::new();
|
||||
matchers.insert("tenant_id".to_string(), "test_.*".to_string());
|
||||
matchers.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
configure_failpoint_with_context("backup_test", "return(failed)", matchers).unwrap();
|
||||
|
||||
// This will match (both conditions met)
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
context.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
// This will NOT match (missing operation)
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
context.insert("operation".to_string(), "restore".to_string());
|
||||
```
|
||||
|
||||
#### Exact String Matching
|
||||
```rust
|
||||
// If regex compilation fails, falls back to exact match
|
||||
let mut matchers = HashMap::new();
|
||||
matchers.insert("env".to_string(), "staging".to_string());
|
||||
|
||||
configure_failpoint_with_context("env_specific", "sleep(1000)", matchers).unwrap();
|
||||
|
||||
// This will match
|
||||
let mut context = HashMap::new();
|
||||
context.insert("env".to_string(), "staging".to_string());
|
||||
// This will NOT match
|
||||
let mut context = HashMap::new();
|
||||
context.insert("env".to_string(), "production".to_string());
|
||||
```
|
||||
|
||||
### Benefits of Context-Based Failpoints
|
||||
|
||||
1. **Selective Testing**: Only inject failures for specific tenants, environments, or operations
|
||||
2. **Production Safety**: Avoid accidentally triggering failpoints in production by using context filters
|
||||
3. **Complex Scenarios**: Test interactions between different components with targeted failures
|
||||
4. **Debugging**: Isolate issues to specific contexts without affecting the entire system
|
||||
|
||||
### Context vs. Non-Context Failpoints
|
||||
|
||||
- **Without context**: `configure_failpoint("name", "action")` - triggers for ALL hits
|
||||
- **With context**: `configure_failpoint_with_context("name", "action", matchers)` - triggers only when context matches
|
||||
|
||||
## Context-Specific Failpoints
|
||||
|
||||
```rust
|
||||
use neon_failpoint::{configure_failpoint_with_context, failpoint};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// Configure a failpoint that only triggers for specific tenants
|
||||
let mut context_matchers = HashMap::new();
|
||||
context_matchers.insert("tenant_id".to_string(), "test_.*".to_string());
|
||||
context_matchers.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
configure_failpoint_with_context(
|
||||
"backup_operation",
|
||||
"return(simulated_failure)",
|
||||
context_matchers
|
||||
).unwrap();
|
||||
|
||||
// Use with context
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
context.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
match failpoint("backup_operation", Some(&context)) {
|
||||
either::Either::Left(result) => {
|
||||
match result {
|
||||
FailpointResult::Return(value) => {
|
||||
// This will trigger for tenant_id matching "test_.*"
|
||||
println!("Backup failed: {}", value);
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
// Continue with backup
|
||||
}
|
||||
FailpointResult::Cancelled => {}
|
||||
}
|
||||
}
|
||||
either::Either::Right(future) => {
|
||||
match future.await {
|
||||
FailpointResult::Return(value) => {
|
||||
// This will trigger for tenant_id matching "test_.*"
|
||||
println!("Backup failed: {}", value);
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
// Continue with backup
|
||||
}
|
||||
FailpointResult::Cancelled => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Macros
|
||||
|
||||
The library provides convenient macros for common patterns:
|
||||
|
||||
### `fail_point!` - Basic Failpoint Macro
|
||||
|
||||
The `fail_point!` macro has three variants:
|
||||
|
||||
1. **Simple failpoint** - `fail_point!(name)`
|
||||
- Just checks the failpoint and continues or returns early (no value)
|
||||
- Panics if the failpoint is configured with `return(value)` since no closure is provided
|
||||
|
||||
2. **Failpoint with return handler** - `fail_point!(name, closure)`
|
||||
- Provides a closure to handle return values from the failpoint
|
||||
- The closure receives `Option<String>` and should return the appropriate value
|
||||
|
||||
3. **Conditional failpoint** - `fail_point!(name, condition, closure)`
|
||||
- Only checks the failpoint if the condition is true
|
||||
- Provides a closure to handle return values (receives `&str`)
|
||||
|
||||
```rust
|
||||
use neon_failpoint::fail_point;
|
||||
|
||||
// Simple failpoint - just continue or return early
|
||||
fail_point!("my_failpoint");
|
||||
|
||||
// Failpoint with return value handling
|
||||
fail_point!("my_failpoint", |value: Option<String>| {
|
||||
match value {
|
||||
Some(v) => {
|
||||
println!("Got value: {}", v);
|
||||
return Ok(v.parse().unwrap_or_default());
|
||||
}
|
||||
None => return Ok(42), // Default return value
|
||||
}
|
||||
});
|
||||
|
||||
// Conditional failpoint - only check if condition is met
|
||||
let should_fail = some_condition();
|
||||
fail_point!("conditional_failpoint", should_fail, |value: &str| {
|
||||
println!("Conditional failpoint triggered with: {}", value);
|
||||
return Err(anyhow::anyhow!("Simulated failure"));
|
||||
});
|
||||
```
|
||||
|
||||
### `fail_point_with_context!` - Context-Aware Failpoint Macro
|
||||
|
||||
The `fail_point_with_context!` macro has three variants that mirror `fail_point!` but include context:
|
||||
|
||||
1. **Simple with context** - `fail_point_with_context!(name, context)`
|
||||
2. **With context and return handler** - `fail_point_with_context!(name, context, closure)`
|
||||
3. **Conditional with context** - `fail_point_with_context!(name, context, condition, closure)`
|
||||
|
||||
```rust
|
||||
use neon_failpoint::{fail_point_with_context};
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
context.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
// Simple context failpoint
|
||||
fail_point_with_context!("backup_failpoint", &context);
|
||||
|
||||
// Context failpoint with return handler
|
||||
fail_point_with_context!("backup_failpoint", &context, |value: Option<String>| {
|
||||
match value {
|
||||
Some(v) => return Err(anyhow::anyhow!("Backup failed: {}", v)),
|
||||
None => return Err(anyhow::anyhow!("Backup failed")),
|
||||
}
|
||||
});
|
||||
|
||||
// Conditional context failpoint
|
||||
let is_test_tenant = tenant_id.starts_with("test_");
|
||||
fail_point_with_context!("backup_failpoint", &context, is_test_tenant, |value: Option<String>| {
|
||||
// Only triggers for test tenants
|
||||
return Err(anyhow::anyhow!("Test tenant backup failure"));
|
||||
});
|
||||
```
|
||||
|
||||
### Other Utility Macros
|
||||
|
||||
```rust
|
||||
use neon_failpoint::{pausable_failpoint, sleep_millis_async};
|
||||
|
||||
// Pausable failpoint with cancellation
|
||||
let cancel_token = CancellationToken::new();
|
||||
if let Err(()) = pausable_failpoint!("pause_here", &cancel_token).await {
|
||||
println!("Failpoint was cancelled");
|
||||
}
|
||||
|
||||
// Sleep failpoint
|
||||
sleep_millis_async!("sleep_here", &cancel_token).await;
|
||||
|
||||
// Context creation helper
|
||||
let mut context = HashMap::new();
|
||||
context.insert("key1".to_string(), "value1".to_string());
|
||||
context.insert("key2".to_string(), "value2".to_string());
|
||||
```
|
||||
|
||||
### Argument Reference
|
||||
|
||||
- **`name`**: String literal - the name of the failpoint
|
||||
- **`context`**: Expression that evaluates to `&HashMap<String, String>` - context for matching
|
||||
- **`condition`**: Boolean expression - only check failpoint if true
|
||||
- **`closure`**: Closure that handles return values:
|
||||
- For `fail_point!` with closure: receives `Option<String>`
|
||||
- For conditional variants: receives `&str`
|
||||
- For `fail_point_with_context!` with closure: receives `Option<String>`
|
||||
- **`cancel`**: `&CancellationToken` - for cancellation support
|
||||
|
||||
## Migration from `fail` crate
|
||||
|
||||
The library provides a compatibility layer in `libs/utils/src/failpoint_support.rs`. Most existing code should work without changes, but you can migrate to the new async APIs for better performance:
|
||||
|
||||
### Before (with `fail` crate):
|
||||
```rust
|
||||
use utils::failpoint_support::pausable_failpoint;
|
||||
|
||||
// This used spawn_blocking internally
|
||||
pausable_failpoint!("my_failpoint", &cancel_token).await?;
|
||||
```
|
||||
|
||||
### After (with `neon_failpoint`):
|
||||
```rust
|
||||
use neon_failpoint::{failpoint_with_cancellation, FailpointResult};
|
||||
|
||||
// This is fully async
|
||||
match failpoint_with_cancellation("my_failpoint", None, &cancel_token).await {
|
||||
FailpointResult::Continue => {},
|
||||
FailpointResult::Cancelled => return Err(()),
|
||||
FailpointResult::Return(_) => {},
|
||||
}
|
||||
```
|
||||
|
||||
## Environment Variable Support
|
||||
|
||||
Failpoints can be configured via the `FAILPOINTS` environment variable:
|
||||
|
||||
```bash
|
||||
FAILPOINTS="failpoint1=return(42);failpoint2=sleep(1000);failpoint3=exit"
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
The library includes comprehensive tests and examples. Run them with:
|
||||
|
||||
```bash
|
||||
cargo test --features testing
|
||||
cargo run --example context_demo --features testing
|
||||
```
|
||||
|
||||
## HTTP Configuration
|
||||
|
||||
The library integrates with the existing HTTP failpoint configuration API. Send POST requests to `/v1/failpoints` with:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"name": "my_failpoint",
|
||||
"actions": "return(42)"
|
||||
}
|
||||
]
|
||||
```
|
||||
82
libs/neon_failpoint/examples/context_demo.rs
Normal file
82
libs/neon_failpoint/examples/context_demo.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
use neon_failpoint::{configure_failpoint_with_context, failpoint, FailpointResult};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// Initialize tracing for better output
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
// Set up a context-specific failpoint
|
||||
let mut context_matchers = HashMap::new();
|
||||
context_matchers.insert("tenant_id".to_string(), "test_.*".to_string());
|
||||
context_matchers.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
configure_failpoint_with_context(
|
||||
"backup_operation",
|
||||
"return(simulated_failure)",
|
||||
context_matchers,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Test with matching context
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "test_123".to_string());
|
||||
context.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
println!("Testing with matching context...");
|
||||
match failpoint("backup_operation", Some(&context)) {
|
||||
either::Either::Left(result) => match result {
|
||||
FailpointResult::Return(value) => {
|
||||
println!("Failpoint triggered with value: {value:?}");
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
println!("Failpoint not triggered");
|
||||
}
|
||||
FailpointResult::Cancelled => {
|
||||
println!("Failpoint cancelled");
|
||||
}
|
||||
},
|
||||
either::Either::Right(future) => match future.await {
|
||||
FailpointResult::Return(value) => {
|
||||
println!("Failpoint triggered with value: {value:?}");
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
println!("Failpoint not triggered");
|
||||
}
|
||||
FailpointResult::Cancelled => {
|
||||
println!("Failpoint cancelled");
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// Test with non-matching context
|
||||
let mut context = HashMap::new();
|
||||
context.insert("tenant_id".to_string(), "prod_456".to_string());
|
||||
context.insert("operation".to_string(), "backup".to_string());
|
||||
|
||||
println!("Testing with non-matching context...");
|
||||
match failpoint("backup_operation", Some(&context)) {
|
||||
either::Either::Left(result) => match result {
|
||||
FailpointResult::Return(value) => {
|
||||
println!("Failpoint triggered with value: {value:?}");
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
println!("Failpoint not triggered (expected)");
|
||||
}
|
||||
FailpointResult::Cancelled => {
|
||||
println!("Failpoint cancelled");
|
||||
}
|
||||
},
|
||||
either::Either::Right(future) => match future.await {
|
||||
FailpointResult::Return(value) => {
|
||||
println!("Failpoint triggered with value: {value:?}");
|
||||
}
|
||||
FailpointResult::Continue => {
|
||||
println!("Failpoint not triggered (expected)");
|
||||
}
|
||||
FailpointResult::Cancelled => {
|
||||
println!("Failpoint cancelled");
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
1003
libs/neon_failpoint/src/lib.rs
Normal file
1003
libs/neon_failpoint/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
356
libs/neon_failpoint/src/macros.rs
Normal file
356
libs/neon_failpoint/src/macros.rs
Normal file
@@ -0,0 +1,356 @@
|
||||
//! Macros for convenient failpoint usage
|
||||
|
||||
/// Simple failpoint macro - async version that doesn't require spawn_blocking
|
||||
#[macro_export]
|
||||
macro_rules! fail_point {
|
||||
($name:literal) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(_) => {
|
||||
panic!("failpoint was configured with return(X) but Rust code does not pass a closure to map X to a return value");
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(future) => {
|
||||
match future.await {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(_) => {
|
||||
panic!("failpoint was configured with return(X) but Rust code does not pass a closure to map X to a return value");
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(future) => {
|
||||
match future.await {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $condition:expr, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
if $condition {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(future) => {
|
||||
match future.await {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// Simple failpoint macro - sync version that panics if async action is triggered
|
||||
#[macro_export]
|
||||
macro_rules! fail_point_sync {
|
||||
($name:literal) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(_) => {
|
||||
panic!("failpoint was configured with return(X) but Rust code does not pass a closure to map X to a return value");
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(_) => {
|
||||
panic!("failpoint '{}' triggered an async action (sleep/pause) but fail_point_sync! was used. Use fail_point! instead.", $name);
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(_) => {
|
||||
panic!("failpoint '{}' triggered an async action (sleep/pause) but fail_point_sync! was used. Use fail_point! instead.", $name);
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $condition:expr, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
if $condition {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(_) => {
|
||||
panic!("failpoint '{}' triggered an async action (sleep/pause) but fail_point_sync! was used. Use fail_point! instead.", $name);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// Failpoint macro with context support
|
||||
#[macro_export]
|
||||
macro_rules! fail_point_with_context {
|
||||
($name:literal, $context:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, Some($context)) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(_) => {
|
||||
panic!("failpoint was configured with return(X) but Rust code does not pass a closure to map X to a return value");
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(future) => {
|
||||
match future.await {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(_) => {
|
||||
panic!("failpoint was configured with return(X) but Rust code does not pass a closure to map X to a return value");
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $context:expr, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, Some($context)) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(future) => {
|
||||
match future.await {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $context:expr, $condition:expr, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
if $condition {
|
||||
match $crate::failpoint($name, Some($context)) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(future) => {
|
||||
match future.await {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// Failpoint macro with context support - sync version
|
||||
#[macro_export]
|
||||
macro_rules! fail_point_with_context_sync {
|
||||
($name:literal, $context:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, Some($context)) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(_) => {
|
||||
panic!("failpoint was configured with return(X) but Rust code does not pass a closure to map X to a return value");
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(_) => {
|
||||
panic!("failpoint '{}' triggered an async action (sleep/pause) but fail_point_with_context_sync! was used. Use fail_point_with_context! instead.", $name);
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $context:expr, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, Some($context)) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(_) => {
|
||||
panic!("failpoint '{}' triggered an async action (sleep/pause) but fail_point_with_context_sync! was used. Use fail_point_with_context! instead.", $name);
|
||||
},
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $context:expr, $condition:expr, $closure:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
if $condition {
|
||||
match $crate::failpoint($name, Some($context)) {
|
||||
$crate::either::Either::Left(result) => {
|
||||
match result {
|
||||
$crate::FailpointResult::Continue => {},
|
||||
$crate::FailpointResult::Return(value) => {
|
||||
let closure = $closure;
|
||||
return closure(value);
|
||||
},
|
||||
$crate::FailpointResult::Cancelled => {},
|
||||
}
|
||||
},
|
||||
$crate::either::Either::Right(_) => {
|
||||
panic!("failpoint '{}' triggered an async action (sleep/pause) but fail_point_with_context_sync! was used. Use fail_point_with_context! instead.", $name);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// Pausable failpoint macro - equivalent to the old pausable_failpoint
|
||||
#[macro_export]
|
||||
macro_rules! pausable_failpoint {
|
||||
($name:literal) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
let cancel = ::tokio_util::sync::CancellationToken::new();
|
||||
let _ = $crate::pausable_failpoint!($name, &cancel);
|
||||
}
|
||||
}};
|
||||
($name:literal, $cancel:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
::tracing::info!("at failpoint {}", $name); // tests rely on this
|
||||
match $crate::failpoint_with_cancellation($name, None, $cancel) {
|
||||
$crate::either::Either::Left(result) => match result {
|
||||
$crate::FailpointResult::Continue => Ok(()),
|
||||
$crate::FailpointResult::Return(_) => Ok(()),
|
||||
$crate::FailpointResult::Cancelled => Err(()),
|
||||
},
|
||||
$crate::either::Either::Right(future) => match future.await {
|
||||
$crate::FailpointResult::Continue => Ok(()),
|
||||
$crate::FailpointResult::Return(_) => Ok(()),
|
||||
$crate::FailpointResult::Cancelled => Err(()),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// Sleep failpoint macro - for async sleep operations
|
||||
#[macro_export]
|
||||
macro_rules! sleep_millis_async {
|
||||
($name:literal) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint($name, None) {
|
||||
$crate::either::Either::Left(_) => {}
|
||||
$crate::either::Either::Right(future) => {
|
||||
future.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
($name:literal, $cancel:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
match $crate::failpoint_with_cancellation($name, None, $cancel) {
|
||||
$crate::either::Either::Left(_) => {}
|
||||
$crate::either::Either::Right(future) => {
|
||||
future.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
// Re-export for convenience
|
||||
pub use fail_point;
|
||||
pub use fail_point_sync;
|
||||
pub use fail_point_with_context;
|
||||
pub use fail_point_with_context_sync;
|
||||
pub use pausable_failpoint;
|
||||
pub use sleep_millis_async;
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Display;
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@@ -11,7 +10,7 @@ use serde::{Deserialize, Serialize};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::models::{PageserverUtilization, ShardParameters, TenantConfig, TimelineInfo};
|
||||
use crate::models::{PageserverUtilization, ShardParameters, TenantConfig};
|
||||
use crate::shard::{ShardStripeSize, TenantShardId};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -61,11 +60,6 @@ pub struct NodeRegisterRequest {
|
||||
pub listen_https_port: Option<u16>,
|
||||
|
||||
pub availability_zone_id: AvailabilityZone,
|
||||
|
||||
// Reachable IP address of the PS/SK registering, if known.
|
||||
// Hadron Cluster Coordiantor will update the DNS record of the registering node
|
||||
// with this IP address.
|
||||
pub node_ip_addr: Option<IpAddr>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -132,13 +126,6 @@ pub struct TenantDescribeResponse {
|
||||
pub config: TenantConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantTimelineDescribeResponse {
|
||||
pub shards: Vec<TimelineInfo>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_consistent_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct NodeShardResponse {
|
||||
pub node_id: NodeId,
|
||||
@@ -551,39 +538,6 @@ pub struct SafekeeperDescribeResponse {
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct TimelineSafekeeperPeer {
|
||||
pub node_id: NodeId,
|
||||
pub listen_http_addr: String,
|
||||
pub http_port: i32,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct SCSafekeeperTimeline {
|
||||
// SC does not know the tenant id.
|
||||
pub timeline_id: TimelineId,
|
||||
pub peers: Vec<NodeId>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct SCSafekeeperTimelinesResponse {
|
||||
pub timelines: Vec<SCSafekeeperTimeline>,
|
||||
pub safekeeper_peers: Vec<TimelineSafekeeperPeer>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct SafekeeperTimeline {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub peers: Vec<NodeId>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct SafekeeperTimelinesResponse {
|
||||
pub timelines: Vec<SafekeeperTimeline>,
|
||||
pub safekeeper_peers: Vec<TimelineSafekeeperPeer>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperSchedulingPolicyRequest {
|
||||
pub scheduling_policy: SkSchedulingPolicy,
|
||||
|
||||
@@ -1622,9 +1622,6 @@ pub struct TimelineInfo {
|
||||
|
||||
/// Whether the timeline is invisible in synthetic size calculations.
|
||||
pub is_invisible: Option<bool>,
|
||||
// HADRON: the largest LSN below which all page updates have been included in the image layers.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_consistent_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
||||
@@ -31,7 +31,6 @@ pub struct UnreliableWrapper {
|
||||
/* BEGIN_HADRON */
|
||||
// This the probability of failure for each operation, ranged from [0, 100].
|
||||
// The probability is default to 100, which means that all operations will fail.
|
||||
// Storage will fail by probability up to attempts_to_fail times.
|
||||
attempt_failure_probability: u64,
|
||||
/* END_HADRON */
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::pageserver_feedback::PageserverFeedback;
|
||||
|
||||
use crate::membership::{Configuration, SafekeeperGeneration};
|
||||
use crate::membership::Configuration;
|
||||
use crate::{ServerInfo, Term};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -311,12 +311,3 @@ pub struct PullTimelineResponse {
|
||||
pub safekeeper_host: Option<String>,
|
||||
// TODO: add more fields?
|
||||
}
|
||||
|
||||
/// Response to a timeline locate request.
|
||||
/// Storcon-only API.
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub struct TimelineLocateResponse {
|
||||
pub generation: SafekeeperGeneration,
|
||||
pub sk_set: Vec<NodeId>,
|
||||
pub new_sk_set: Option<Vec<NodeId>>,
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ default = ["rename_noreplace"]
|
||||
rename_noreplace = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints"]
|
||||
testing = ["neon_failpoint/testing"]
|
||||
|
||||
[dependencies]
|
||||
arc-swap.workspace = true
|
||||
@@ -21,10 +21,11 @@ bytes.workspace = true
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
diatomic-waker.workspace = true
|
||||
either.workspace = true
|
||||
git-version.workspace = true
|
||||
hex = { workspace = true, features = ["serde"] }
|
||||
humantime.workspace = true
|
||||
fail.workspace = true
|
||||
neon_failpoint.workspace = true
|
||||
futures = { workspace = true }
|
||||
jsonwebtoken.workspace = true
|
||||
nix = { workspace = true, features = ["ioctl"] }
|
||||
|
||||
@@ -47,7 +47,6 @@ where
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
pub enum DeploymentMode {
|
||||
Local,
|
||||
Dev,
|
||||
Staging,
|
||||
Prod,
|
||||
@@ -65,7 +64,7 @@ pub fn get_deployment_mode() -> Option<DeploymentMode> {
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
// tracing::error!("DEPLOYMENT_MODE not set");
|
||||
tracing::error!("DEPLOYMENT_MODE not set");
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,59 +1,22 @@
|
||||
//! Failpoint support code shared between pageserver and safekeepers.
|
||||
//!
|
||||
//! This module provides a compatibility layer over the new neon_failpoint crate.
|
||||
|
||||
pub use neon_failpoint::{configure_failpoint as apply_failpoint, has_failpoints, init};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
/// Declare a failpoint that can use to `pause` failpoint action.
|
||||
/// We don't want to block the executor thread, hence, spawn_blocking + await.
|
||||
///
|
||||
/// Optionally pass a cancellation token, and this failpoint will drop out of
|
||||
/// its pause when the cancellation token fires. This is useful for testing
|
||||
/// cases where we would like to block something, but test its clean shutdown behavior.
|
||||
/// The macro evaluates to a Result in that case, where Ok(()) is the case
|
||||
/// where the failpoint was not paused, and Err() is the case where cancellation
|
||||
/// token fired while evaluating the failpoint.
|
||||
///
|
||||
/// Remember to unpause the failpoint in the test; until that happens, one of the
|
||||
/// limited number of spawn_blocking thread pool threads is leaked.
|
||||
/// Mere forward to neon_failpoint::pausable_failpoint
|
||||
#[macro_export]
|
||||
macro_rules! pausable_failpoint {
|
||||
($name:literal) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
let cancel = ::tokio_util::sync::CancellationToken::new();
|
||||
let _ = $crate::pausable_failpoint!($name, &cancel);
|
||||
}
|
||||
}};
|
||||
($name:literal, $cancel:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
let failpoint_fut = ::tokio::task::spawn_blocking({
|
||||
let current = ::tracing::Span::current();
|
||||
move || {
|
||||
let _entered = current.entered();
|
||||
::tracing::info!("at failpoint {}", $name);
|
||||
::fail::fail_point!($name);
|
||||
}
|
||||
});
|
||||
let cancel_fut = async move {
|
||||
$cancel.cancelled().await;
|
||||
};
|
||||
::tokio::select! {
|
||||
res = failpoint_fut => {
|
||||
res.expect("spawn_blocking");
|
||||
// continue with execution
|
||||
Ok(())
|
||||
},
|
||||
_ = cancel_fut => {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}};
|
||||
($name:literal) => {
|
||||
::neon_failpoint::pausable_failpoint!($name)
|
||||
};
|
||||
($name:literal, $cancel:expr) => {
|
||||
::neon_failpoint::pausable_failpoint!($name, $cancel)
|
||||
};
|
||||
}
|
||||
|
||||
pub use pausable_failpoint;
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
/// DEPRECATED! - use with fail::cfg("$name", "return(2000)")
|
||||
///
|
||||
/// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the
|
||||
/// specified time (in milliseconds). The main difference is that we use async
|
||||
@@ -69,7 +32,7 @@ macro_rules! __failpoint_sleep_millis_async {
|
||||
// If the failpoint is used with a "return" action, set should_sleep to the
|
||||
// returned value (as string). Otherwise it's set to None.
|
||||
let should_sleep = (|| {
|
||||
::fail::fail_point!($name, |x| x);
|
||||
::neon_failpoint::fail_point_sync!($name, |x| x);
|
||||
::std::option::Option::None
|
||||
})();
|
||||
|
||||
@@ -82,7 +45,7 @@ macro_rules! __failpoint_sleep_millis_async {
|
||||
// If the failpoint is used with a "return" action, set should_sleep to the
|
||||
// returned value (as string). Otherwise it's set to None.
|
||||
let should_sleep = (|| {
|
||||
::fail::fail_point!($name, |x| x);
|
||||
::neon_failpoint::fail_point_sync!($name, |x| x);
|
||||
::std::option::Option::None
|
||||
})();
|
||||
|
||||
@@ -126,60 +89,3 @@ pub async fn failpoint_sleep_cancellable_helper(
|
||||
tokio::time::timeout(d, cancel.cancelled()).await.ok();
|
||||
tracing::info!("failpoint {:?}: sleep done", name);
|
||||
}
|
||||
|
||||
/// Initialize the configured failpoints
|
||||
///
|
||||
/// You must call this function before any concurrent threads do operations.
|
||||
pub fn init() -> fail::FailScenario<'static> {
|
||||
// The failpoints lib provides support for parsing the `FAILPOINTS` env var.
|
||||
// We want non-default behavior for `exit`, though, so, we handle it separately.
|
||||
//
|
||||
// Format for FAILPOINTS is "name=actions" separated by ";".
|
||||
let actions = std::env::var("FAILPOINTS");
|
||||
if actions.is_ok() {
|
||||
// SAFETY: this function should before any threads start and access env vars concurrently
|
||||
unsafe {
|
||||
std::env::remove_var("FAILPOINTS");
|
||||
}
|
||||
} else {
|
||||
// let the library handle non-utf8, or nothing for not present
|
||||
}
|
||||
|
||||
let scenario = fail::FailScenario::setup();
|
||||
|
||||
if let Ok(val) = actions {
|
||||
val.split(';')
|
||||
.enumerate()
|
||||
.map(|(i, s)| s.split_once('=').ok_or((i, s)))
|
||||
.for_each(|res| {
|
||||
let (name, actions) = match res {
|
||||
Ok(t) => t,
|
||||
Err((i, s)) => {
|
||||
panic!(
|
||||
"startup failpoints: missing action on the {}th failpoint; try `{s}=return`",
|
||||
i + 1,
|
||||
);
|
||||
}
|
||||
};
|
||||
if let Err(e) = apply_failpoint(name, actions) {
|
||||
panic!("startup failpoints: failed to apply failpoint {name}={actions}: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
scenario
|
||||
}
|
||||
|
||||
pub fn apply_failpoint(name: &str, actions: &str) -> Result<(), String> {
|
||||
if actions == "exit" {
|
||||
fail::cfg_callback(name, exit_failpoint)
|
||||
} else {
|
||||
fail::cfg(name, actions)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn exit_failpoint() {
|
||||
tracing::info!("Exit requested by failpoint");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
use std::env::{VarError, var};
|
||||
use std::error::Error;
|
||||
use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Name of the environment variable containing the reachable IP address of the node. If set, the IP address contained in this
|
||||
/// environment variable is used as the reachable IP address of the pageserver or safekeeper node during node registration.
|
||||
/// In a Kubernetes environment, this environment variable should be set by Kubernetes to the Pod IP (specified in the Pod
|
||||
/// template).
|
||||
pub const HADRON_NODE_IP_ADDRESS: &str = "HADRON_NODE_IP_ADDRESS";
|
||||
|
||||
/// Read the reachable IP address of this page server from env var HADRON_NODE_IP_ADDRESS.
|
||||
/// In Kubernetes this environment variable is set to the Pod IP (specified in the Pod template).
|
||||
pub fn read_node_ip_addr_from_env() -> Result<Option<IpAddr>, Box<dyn Error>> {
|
||||
match var(HADRON_NODE_IP_ADDRESS) {
|
||||
Ok(v) => {
|
||||
if let Ok(addr) = IpAddr::from_str(&v) {
|
||||
Ok(Some(addr))
|
||||
} else {
|
||||
Err(format!("Invalid IP address string: {v}. Cannot be parsed as either an IPv4 or an IPv6 address.").into())
|
||||
}
|
||||
}
|
||||
Err(VarError::NotPresent) => Ok(None),
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::env;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
|
||||
#[test]
|
||||
fn test_read_node_ip_addr_from_env() {
|
||||
// SAFETY: test code
|
||||
unsafe {
|
||||
// Test with a valid IPv4 address
|
||||
env::set_var(HADRON_NODE_IP_ADDRESS, "192.168.1.1");
|
||||
let result = read_node_ip_addr_from_env().unwrap();
|
||||
assert_eq!(result, Some(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))));
|
||||
|
||||
// Test with a valid IPv6 address
|
||||
env::set_var(
|
||||
HADRON_NODE_IP_ADDRESS,
|
||||
"2001:0db8:85a3:0000:0000:8a2e:0370:7334",
|
||||
);
|
||||
}
|
||||
let result = read_node_ip_addr_from_env().unwrap();
|
||||
assert_eq!(
|
||||
result,
|
||||
Some(IpAddr::V6(
|
||||
Ipv6Addr::from_str("2001:0db8:85a3:0000:0000:8a2e:0370:7334").unwrap()
|
||||
))
|
||||
);
|
||||
|
||||
// Test with an invalid IP address
|
||||
// SAFETY: test code
|
||||
unsafe {
|
||||
env::set_var(HADRON_NODE_IP_ADDRESS, "invalid_ip");
|
||||
}
|
||||
let result = read_node_ip_addr_from_env();
|
||||
assert!(result.is_err());
|
||||
|
||||
// Test with no environment variable set
|
||||
// SAFETY: test code
|
||||
unsafe {
|
||||
env::remove_var(HADRON_NODE_IP_ADDRESS);
|
||||
}
|
||||
let result = read_node_ip_addr_from_env().unwrap();
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
}
|
||||
@@ -26,9 +26,6 @@ pub mod auth;
|
||||
// utility functions and helper traits for unified unique id generation/serialization etc.
|
||||
pub mod id;
|
||||
|
||||
// utility functions to obtain reachable IP addresses in PS/SK nodes.
|
||||
pub mod ip_address;
|
||||
|
||||
pub mod shard;
|
||||
|
||||
mod hex;
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -8,7 +7,7 @@ use metrics::{IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use strum_macros::{EnumString, VariantNames};
|
||||
use tokio::time::Instant;
|
||||
use tracing::{info, warn};
|
||||
use tracing::info;
|
||||
|
||||
/// Logs a critical error, similarly to `tracing::error!`. This will:
|
||||
///
|
||||
@@ -378,11 +377,10 @@ impl std::fmt::Debug for SecretString {
|
||||
///
|
||||
/// TODO: consider upgrading this to a warning, but currently it fires too often.
|
||||
#[inline]
|
||||
pub async fn log_slow<O>(
|
||||
name: &str,
|
||||
threshold: Duration,
|
||||
f: Pin<&mut impl Future<Output = O>>,
|
||||
) -> O {
|
||||
pub async fn log_slow<F, O>(name: &str, threshold: Duration, f: std::pin::Pin<&mut F>) -> O
|
||||
where
|
||||
F: Future<Output = O>,
|
||||
{
|
||||
monitor_slow_future(
|
||||
threshold,
|
||||
threshold, // period = threshold
|
||||
@@ -396,42 +394,16 @@ pub async fn log_slow<O>(
|
||||
if !is_slow {
|
||||
return;
|
||||
}
|
||||
let elapsed = elapsed_total.as_secs_f64();
|
||||
if ready {
|
||||
info!("slow {name} completed after {elapsed:.3}s");
|
||||
info!(
|
||||
"slow {name} completed after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
} else {
|
||||
info!("slow {name} still running after {elapsed:.3}s");
|
||||
}
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Logs a periodic warning if a future is slow to complete.
|
||||
#[inline]
|
||||
pub async fn warn_slow<O>(
|
||||
name: &str,
|
||||
threshold: Duration,
|
||||
f: Pin<&mut impl Future<Output = O>>,
|
||||
) -> O {
|
||||
monitor_slow_future(
|
||||
threshold,
|
||||
threshold, // period = threshold
|
||||
f,
|
||||
|MonitorSlowFutureCallback {
|
||||
ready,
|
||||
is_slow,
|
||||
elapsed_total,
|
||||
elapsed_since_last_callback: _,
|
||||
}| {
|
||||
if !is_slow {
|
||||
return;
|
||||
}
|
||||
let elapsed = elapsed_total.as_secs_f64();
|
||||
if ready {
|
||||
warn!("slow {name} completed after {elapsed:.3}s");
|
||||
} else {
|
||||
warn!("slow {name} still running after {elapsed:.3}s");
|
||||
info!(
|
||||
"slow {name} still running after {:.3}s",
|
||||
elapsed_total.as_secs_f64()
|
||||
);
|
||||
}
|
||||
},
|
||||
)
|
||||
@@ -444,7 +416,7 @@ pub async fn warn_slow<O>(
|
||||
pub async fn monitor_slow_future<F, O>(
|
||||
threshold: Duration,
|
||||
period: Duration,
|
||||
mut fut: Pin<&mut F>,
|
||||
mut fut: std::pin::Pin<&mut F>,
|
||||
mut cb: impl FnMut(MonitorSlowFutureCallback),
|
||||
) -> O
|
||||
where
|
||||
|
||||
@@ -8,7 +8,7 @@ license.workspace = true
|
||||
default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "pageserver_client/testing"]
|
||||
testing = ["neon_failpoint/testing", "pageserver_api/testing", "wal_decoder/testing", "pageserver_client/testing"]
|
||||
|
||||
fuzz-read-path = ["testing"]
|
||||
|
||||
@@ -33,7 +33,7 @@ crc32c.workspace = true
|
||||
either.workspace = true
|
||||
enum-map.workspace = true
|
||||
enumset = { workspace = true, features = ["serde"]}
|
||||
fail.workspace = true
|
||||
neon_failpoint.workspace = true
|
||||
futures.workspace = true
|
||||
hashlink.workspace = true
|
||||
hex.workspace = true
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZero;
|
||||
use std::pin::pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::anyhow;
|
||||
use arc_swap::ArcSwap;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::{FutureExt as _, StreamExt as _};
|
||||
use tonic::codec::CompressionEncoding;
|
||||
use tracing::{debug, instrument};
|
||||
use utils::logging::warn_slow;
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::pool::{ChannelPool, ClientGuard, ClientPool, StreamGuard, StreamPool};
|
||||
use crate::retry::Retry;
|
||||
@@ -24,40 +21,28 @@ use utils::shard::{ShardCount, ShardIndex, ShardNumber};
|
||||
/// Max number of concurrent clients per channel (i.e. TCP connection). New channels will be spun up
|
||||
/// when full.
|
||||
///
|
||||
/// Normal requests are small, and we don't pipeline them, so we can afford a large number of
|
||||
/// streams per connection.
|
||||
///
|
||||
/// TODO: tune all of these constants, and consider making them configurable.
|
||||
const MAX_CLIENTS_PER_CHANNEL: NonZero<usize> = NonZero::new(64).unwrap();
|
||||
/// TODO: consider separate limits for unary and streaming clients, so we don't fill up channels
|
||||
/// with only streams.
|
||||
const MAX_CLIENTS_PER_CHANNEL: NonZero<usize> = NonZero::new(16).unwrap();
|
||||
|
||||
/// Max number of concurrent bulk GetPage streams per channel (i.e. TCP connection). These use a
|
||||
/// dedicated channel pool with a lower client limit, to avoid TCP-level head-of-line blocking and
|
||||
/// transmission delays. This also concentrates large window sizes on a smaller set of
|
||||
/// streams/connections, presumably reducing memory use.
|
||||
const MAX_BULK_CLIENTS_PER_CHANNEL: NonZero<usize> = NonZero::new(16).unwrap();
|
||||
/// Max number of concurrent unary request clients per shard.
|
||||
const MAX_UNARY_CLIENTS: NonZero<usize> = NonZero::new(64).unwrap();
|
||||
|
||||
/// The batch size threshold at which a GetPage request will use the bulk stream pool.
|
||||
///
|
||||
/// The gRPC initial window size is 64 KB. Each page is 8 KB, so let's avoid increasing the window
|
||||
/// size for the normal stream pool, and route requests for >= 5 pages (>32 KB) to the bulk pool.
|
||||
const BULK_THRESHOLD_BATCH_SIZE: usize = 5;
|
||||
/// Max number of concurrent GetPage streams per shard. The max number of concurrent GetPage
|
||||
/// requests is given by `MAX_STREAMS * MAX_STREAM_QUEUE_DEPTH`.
|
||||
const MAX_STREAMS: NonZero<usize> = NonZero::new(64).unwrap();
|
||||
|
||||
/// The overall request call timeout, including retries and pool acquisition.
|
||||
/// TODO: should we retry forever? Should the caller decide?
|
||||
const CALL_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
/// Max number of pipelined requests per stream.
|
||||
const MAX_STREAM_QUEUE_DEPTH: NonZero<usize> = NonZero::new(2).unwrap();
|
||||
|
||||
/// The per-request (retry attempt) timeout, including any lazy connection establishment.
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
/// Max number of concurrent bulk GetPage streams per shard, used e.g. for prefetches. Because these
|
||||
/// are more throughput-oriented, we have a smaller limit but higher queue depth.
|
||||
const MAX_BULK_STREAMS: NonZero<usize> = NonZero::new(16).unwrap();
|
||||
|
||||
/// The initial request retry backoff duration. The first retry does not back off.
|
||||
/// TODO: use a different backoff for ResourceExhausted (rate limiting)? Needs server support.
|
||||
const BASE_BACKOFF: Duration = Duration::from_millis(5);
|
||||
|
||||
/// The maximum request retry backoff duration.
|
||||
const MAX_BACKOFF: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Threshold and interval for warning about slow operation.
|
||||
const SLOW_THRESHOLD: Duration = Duration::from_secs(3);
|
||||
/// Max number of pipelined requests per bulk stream. These are more throughput-oriented and thus
|
||||
/// get a larger queue depth.
|
||||
const MAX_BULK_STREAM_QUEUE_DEPTH: NonZero<usize> = NonZero::new(4).unwrap();
|
||||
|
||||
/// A rich Pageserver gRPC client for a single tenant timeline. This client is more capable than the
|
||||
/// basic `page_api::Client` gRPC client, and supports:
|
||||
@@ -65,19 +50,10 @@ const SLOW_THRESHOLD: Duration = Duration::from_secs(3);
|
||||
/// * Sharded tenants across multiple Pageservers.
|
||||
/// * Pooling of connections, clients, and streams for efficient resource use.
|
||||
/// * Concurrent use by many callers.
|
||||
/// * Internal handling of GetPage bidirectional streams.
|
||||
/// * Internal handling of GetPage bidirectional streams, with pipelining and error handling.
|
||||
/// * Automatic retries.
|
||||
/// * Observability.
|
||||
///
|
||||
/// The client has dedicated connection/client/stream pools per shard, for resource reuse. These
|
||||
/// pools are unbounded: we allow scaling out as many concurrent streams as needed to serve all
|
||||
/// concurrent callers, which mostly eliminates head-of-line blocking. Idle streams are fairly
|
||||
/// cheap: the server task currently uses 26 KB of memory, so we can comfortably fit 100,000
|
||||
/// concurrent idle streams (2.5 GB memory). The worst case degenerates to the old libpq case with
|
||||
/// one stream per backend, but without the TCP connection overhead. In the common case we expect
|
||||
/// significantly lower stream counts due to stream sharing, driven e.g. by idle backends, LFC hits,
|
||||
/// read coalescing, sharding (backends typically only talk to one shard at a time), etc.
|
||||
///
|
||||
/// TODO: this client does not support base backups or LSN leases, as these are only used by
|
||||
/// compute_ctl. Consider adding this, but LSN leases need concurrent requests on all shards.
|
||||
pub struct PageserverClient {
|
||||
@@ -91,6 +67,8 @@ pub struct PageserverClient {
|
||||
compression: Option<CompressionEncoding>,
|
||||
/// The shards for this tenant.
|
||||
shards: ArcSwap<Shards>,
|
||||
/// The retry configuration.
|
||||
retry: Retry,
|
||||
}
|
||||
|
||||
impl PageserverClient {
|
||||
@@ -116,6 +94,7 @@ impl PageserverClient {
|
||||
auth_token,
|
||||
compression,
|
||||
shards: ArcSwap::new(Arc::new(shards)),
|
||||
retry: Retry,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -163,15 +142,13 @@ impl PageserverClient {
|
||||
&self,
|
||||
req: page_api::CheckRelExistsRequest,
|
||||
) -> tonic::Result<page_api::CheckRelExistsResponse> {
|
||||
debug!("sending request: {req:?}");
|
||||
let resp = Self::with_retries(CALL_TIMEOUT, async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
Self::with_timeout(REQUEST_TIMEOUT, client.check_rel_exists(req)).await
|
||||
})
|
||||
.await?;
|
||||
debug!("received response: {resp:?}");
|
||||
Ok(resp)
|
||||
self.retry
|
||||
.with(async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
client.check_rel_exists(req).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Returns the total size of a database, as # of bytes.
|
||||
@@ -180,15 +157,13 @@ impl PageserverClient {
|
||||
&self,
|
||||
req: page_api::GetDbSizeRequest,
|
||||
) -> tonic::Result<page_api::GetDbSizeResponse> {
|
||||
debug!("sending request: {req:?}");
|
||||
let resp = Self::with_retries(CALL_TIMEOUT, async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
Self::with_timeout(REQUEST_TIMEOUT, client.get_db_size(req)).await
|
||||
})
|
||||
.await?;
|
||||
debug!("received response: {resp:?}");
|
||||
Ok(resp)
|
||||
self.retry
|
||||
.with(async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
client.get_db_size(req).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Fetches pages. The `request_id` must be unique across all in-flight requests, and the
|
||||
@@ -218,8 +193,6 @@ impl PageserverClient {
|
||||
return Err(tonic::Status::invalid_argument("request attempt must be 0"));
|
||||
}
|
||||
|
||||
debug!("sending request: {req:?}");
|
||||
|
||||
// The shards may change while we're fetching pages. We execute the request using a stable
|
||||
// view of the shards (especially important for requests that span shards), but retry the
|
||||
// top-level (pre-split) request to pick up shard changes. This can lead to unnecessary
|
||||
@@ -228,16 +201,13 @@ impl PageserverClient {
|
||||
//
|
||||
// TODO: the gRPC server and client doesn't yet properly support shard splits. Revisit this
|
||||
// once we figure out how to handle these.
|
||||
let resp = Self::with_retries(CALL_TIMEOUT, async |attempt| {
|
||||
let mut req = req.clone();
|
||||
req.request_id.attempt = attempt as u32;
|
||||
let shards = self.shards.load_full();
|
||||
Self::with_timeout(REQUEST_TIMEOUT, Self::get_page_with_shards(req, &shards)).await
|
||||
})
|
||||
.await?;
|
||||
|
||||
debug!("received response: {resp:?}");
|
||||
Ok(resp)
|
||||
self.retry
|
||||
.with(async |attempt| {
|
||||
let mut req = req.clone();
|
||||
req.request_id.attempt = attempt as u32;
|
||||
Self::get_page_with_shards(req, &self.shards.load_full()).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Fetches pages using the given shards. This uses a stable view of the shards, regardless of
|
||||
@@ -276,7 +246,7 @@ impl PageserverClient {
|
||||
req: page_api::GetPageRequest,
|
||||
shard: &Shard,
|
||||
) -> tonic::Result<page_api::GetPageResponse> {
|
||||
let mut stream = shard.stream(Self::is_bulk(&req)).await?;
|
||||
let stream = shard.stream(req.request_class.is_bulk()).await;
|
||||
let resp = stream.send(req.clone()).await?;
|
||||
|
||||
// Convert per-request errors into a tonic::Status.
|
||||
@@ -320,15 +290,13 @@ impl PageserverClient {
|
||||
&self,
|
||||
req: page_api::GetRelSizeRequest,
|
||||
) -> tonic::Result<page_api::GetRelSizeResponse> {
|
||||
debug!("sending request: {req:?}");
|
||||
let resp = Self::with_retries(CALL_TIMEOUT, async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
Self::with_timeout(REQUEST_TIMEOUT, client.get_rel_size(req)).await
|
||||
})
|
||||
.await?;
|
||||
debug!("received response: {resp:?}");
|
||||
Ok(resp)
|
||||
self.retry
|
||||
.with(async |_| {
|
||||
// Relation metadata is only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
client.get_rel_size(req).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Fetches an SLRU segment.
|
||||
@@ -337,50 +305,13 @@ impl PageserverClient {
|
||||
&self,
|
||||
req: page_api::GetSlruSegmentRequest,
|
||||
) -> tonic::Result<page_api::GetSlruSegmentResponse> {
|
||||
debug!("sending request: {req:?}");
|
||||
let resp = Self::with_retries(CALL_TIMEOUT, async |_| {
|
||||
// SLRU segments are only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
Self::with_timeout(REQUEST_TIMEOUT, client.get_slru_segment(req)).await
|
||||
})
|
||||
.await?;
|
||||
debug!("received response: {resp:?}");
|
||||
Ok(resp)
|
||||
}
|
||||
|
||||
/// Runs the given async closure with retries up to the given timeout. Only certain gRPC status
|
||||
/// codes are retried, see [`Retry::should_retry`]. Returns `DeadlineExceeded` on timeout.
|
||||
async fn with_retries<T, F, O>(timeout: Duration, f: F) -> tonic::Result<T>
|
||||
where
|
||||
F: FnMut(usize) -> O, // pass attempt number, starting at 0
|
||||
O: Future<Output = tonic::Result<T>>,
|
||||
{
|
||||
Retry {
|
||||
timeout: Some(timeout),
|
||||
base_backoff: BASE_BACKOFF,
|
||||
max_backoff: MAX_BACKOFF,
|
||||
}
|
||||
.with(f)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Runs the given future with a timeout. Returns `DeadlineExceeded` on timeout.
|
||||
async fn with_timeout<T>(
|
||||
timeout: Duration,
|
||||
f: impl Future<Output = tonic::Result<T>>,
|
||||
) -> tonic::Result<T> {
|
||||
let started = Instant::now();
|
||||
tokio::time::timeout(timeout, f).await.map_err(|_| {
|
||||
tonic::Status::deadline_exceeded(format!(
|
||||
"request timed out after {:.3}s",
|
||||
started.elapsed().as_secs_f64()
|
||||
))
|
||||
})?
|
||||
}
|
||||
|
||||
/// Returns true if the request is considered a bulk request and should use the bulk pool.
|
||||
fn is_bulk(req: &page_api::GetPageRequest) -> bool {
|
||||
req.block_numbers.len() >= BULK_THRESHOLD_BATCH_SIZE
|
||||
self.retry
|
||||
.with(async |_| {
|
||||
// SLRU segments are only available on shard 0.
|
||||
let mut client = self.shards.load_full().get_zero().client().await?;
|
||||
client.get_slru_segment(req).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -509,23 +440,15 @@ impl Shards {
|
||||
}
|
||||
}
|
||||
|
||||
/// A single shard. Has dedicated resource pools with the following structure:
|
||||
/// A single shard. Uses dedicated resource pools with the following structure:
|
||||
///
|
||||
/// * Channel pool: MAX_CLIENTS_PER_CHANNEL.
|
||||
/// * Client pool: unbounded.
|
||||
/// * Stream pool: unbounded.
|
||||
/// * Bulk channel pool: MAX_BULK_CLIENTS_PER_CHANNEL.
|
||||
/// * Channel pool: unbounded.
|
||||
/// * Unary client pool: MAX_UNARY_CLIENTS.
|
||||
/// * Stream client pool: unbounded.
|
||||
/// * Stream pool: MAX_STREAMS and MAX_STREAM_QUEUE_DEPTH.
|
||||
/// * Bulk channel pool: unbounded.
|
||||
/// * Bulk client pool: unbounded.
|
||||
/// * Bulk stream pool: unbounded.
|
||||
///
|
||||
/// We use a separate bulk channel pool with a lower concurrency limit for large batch requests.
|
||||
/// This avoids TCP-level head-of-line blocking, and also concentrates large window sizes on a
|
||||
/// smaller set of streams/connections, which presumably reduces memory use. Neither of these pools
|
||||
/// are bounded, nor do they pipeline requests, so the latency characteristics should be mostly
|
||||
/// similar (except for TCP transmission time).
|
||||
///
|
||||
/// TODO: since we never use bounded pools, we could consider removing the pool limiters. However,
|
||||
/// the code is fairly trivial, so we may as well keep them around for now in case we need them.
|
||||
/// * Bulk stream pool: MAX_BULK_STREAMS and MAX_BULK_STREAM_QUEUE_DEPTH.
|
||||
struct Shard {
|
||||
/// The shard ID.
|
||||
id: ShardIndex,
|
||||
@@ -533,7 +456,7 @@ struct Shard {
|
||||
client_pool: Arc<ClientPool>,
|
||||
/// GetPage stream pool.
|
||||
stream_pool: Arc<StreamPool>,
|
||||
/// GetPage stream pool for bulk requests.
|
||||
/// GetPage stream pool for bulk requests, e.g. prefetches.
|
||||
bulk_stream_pool: Arc<StreamPool>,
|
||||
}
|
||||
|
||||
@@ -547,30 +470,50 @@ impl Shard {
|
||||
auth_token: Option<String>,
|
||||
compression: Option<CompressionEncoding>,
|
||||
) -> anyhow::Result<Self> {
|
||||
// Shard pools for unary requests and non-bulk GetPage requests.
|
||||
// Common channel pool for unary and stream requests. Bounded by client/stream pools.
|
||||
let channel_pool = ChannelPool::new(url.clone(), MAX_CLIENTS_PER_CHANNEL)?;
|
||||
|
||||
// Client pool for unary requests.
|
||||
let client_pool = ClientPool::new(
|
||||
ChannelPool::new(url.clone(), MAX_CLIENTS_PER_CHANNEL)?,
|
||||
channel_pool.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_id,
|
||||
auth_token.clone(),
|
||||
compression,
|
||||
None, // unbounded
|
||||
Some(MAX_UNARY_CLIENTS),
|
||||
);
|
||||
let stream_pool = StreamPool::new(client_pool.clone(), None); // unbounded
|
||||
|
||||
// Bulk GetPage stream pool for large batches (prefetches, sequential scans, vacuum, etc.).
|
||||
// GetPage stream pool. Uses a dedicated client pool to avoid starving out unary clients,
|
||||
// but shares a channel pool with it (as it's unbounded).
|
||||
let stream_pool = StreamPool::new(
|
||||
ClientPool::new(
|
||||
channel_pool.clone(),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_id,
|
||||
auth_token.clone(),
|
||||
compression,
|
||||
None, // unbounded, limited by stream pool
|
||||
),
|
||||
Some(MAX_STREAMS),
|
||||
MAX_STREAM_QUEUE_DEPTH,
|
||||
);
|
||||
|
||||
// Bulk GetPage stream pool, e.g. for prefetches. Uses dedicated channel/client/stream pools
|
||||
// to avoid head-of-line blocking of latency-sensitive requests.
|
||||
let bulk_stream_pool = StreamPool::new(
|
||||
ClientPool::new(
|
||||
ChannelPool::new(url, MAX_BULK_CLIENTS_PER_CHANNEL)?,
|
||||
ChannelPool::new(url, MAX_CLIENTS_PER_CHANNEL)?,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
shard_id,
|
||||
auth_token,
|
||||
compression,
|
||||
None, // unbounded,
|
||||
None, // unbounded, limited by stream pool
|
||||
),
|
||||
None, // unbounded
|
||||
Some(MAX_BULK_STREAMS),
|
||||
MAX_BULK_STREAM_QUEUE_DEPTH,
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
@@ -582,23 +525,19 @@ impl Shard {
|
||||
}
|
||||
|
||||
/// Returns a pooled client for this shard.
|
||||
#[instrument(skip_all)]
|
||||
async fn client(&self) -> tonic::Result<ClientGuard> {
|
||||
warn_slow(
|
||||
"client pool acquisition",
|
||||
SLOW_THRESHOLD,
|
||||
pin!(self.client_pool.get()),
|
||||
)
|
||||
.await
|
||||
self.client_pool
|
||||
.get()
|
||||
.await
|
||||
.map_err(|err| tonic::Status::internal(format!("failed to get client: {err}")))
|
||||
}
|
||||
|
||||
/// Returns a pooled stream for this shard. If `bulk` is `true`, uses the dedicated bulk pool.
|
||||
#[instrument(skip_all, fields(bulk))]
|
||||
async fn stream(&self, bulk: bool) -> tonic::Result<StreamGuard> {
|
||||
let pool = match bulk {
|
||||
false => &self.stream_pool,
|
||||
true => &self.bulk_stream_pool,
|
||||
};
|
||||
warn_slow("stream pool acquisition", SLOW_THRESHOLD, pin!(pool.get())).await
|
||||
/// Returns a pooled stream for this shard. If `bulk` is `true`, uses the dedicated bulk stream
|
||||
/// pool (e.g. for prefetches).
|
||||
async fn stream(&self, bulk: bool) -> StreamGuard {
|
||||
match bulk {
|
||||
false => self.stream_pool.get().await,
|
||||
true => self.bulk_stream_pool.get().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,36 +9,19 @@
|
||||
//!
|
||||
//! * ChannelPool: manages gRPC channels (TCP connections) to a single Pageserver. Multiple clients
|
||||
//! can acquire and use the same channel concurrently (via HTTP/2 stream multiplexing), up to a
|
||||
//! per-channel client limit. Channels are closed immediately when empty, and indirectly rely on
|
||||
//! client/stream idle timeouts.
|
||||
//! per-channel client limit. Channels may be closed when they are no longer used by any clients.
|
||||
//!
|
||||
//! * ClientPool: manages gRPC clients for a single tenant shard. Each client acquires a (shared)
|
||||
//! channel from the ChannelPool for the client's lifetime. A client can only be acquired by a
|
||||
//! single caller at a time, and is returned to the pool when dropped. Idle clients are removed
|
||||
//! from the pool after a while to free up resources.
|
||||
//! single caller at a time, and is returned to the pool when dropped. Idle clients may be removed
|
||||
//! from the pool after some time, to free up the channel.
|
||||
//!
|
||||
//! * StreamPool: manages bidirectional gRPC GetPage streams. Each stream acquires a client from the
|
||||
//! ClientPool for the stream's lifetime. A stream can only be acquired by a single caller at a
|
||||
//! time, and is returned to the pool when dropped. Idle streams are removed from the pool after
|
||||
//! a while to free up resources.
|
||||
//!
|
||||
//! The stream only supports sending a single, synchronous request at a time, and does not support
|
||||
//! pipelining multiple requests from different callers onto the same stream -- instead, we scale
|
||||
//! out concurrent streams to improve throughput. There are many reasons for this design choice:
|
||||
//!
|
||||
//! * It (mostly) eliminates head-of-line blocking. A single stream is processed sequentially by
|
||||
//! a single server task, which may block e.g. on layer downloads, LSN waits, etc.
|
||||
//!
|
||||
//! * Cancellation becomes trivial, by closing the stream. Otherwise, if a caller goes away
|
||||
//! (e.g. because of a timeout), the request would still be processed by the server and block
|
||||
//! requests behind it in the stream. It might even block its own timeout retry.
|
||||
//!
|
||||
//! * Stream scheduling becomes significantly simpler and cheaper.
|
||||
//!
|
||||
//! * Individual callers can still use client-side batching for pipelining.
|
||||
//!
|
||||
//! * Idle streams are cheap. Benchmarks show that an idle GetPage stream takes up about 26 KB
|
||||
//! per stream (2.5 GB for 100,000 streams), so we can afford to scale out.
|
||||
//! ClientPool for the stream's lifetime. Internal streams are not exposed to callers; instead, it
|
||||
//! returns a guard that can be used to send a single request, to properly enforce queue depth and
|
||||
//! route responses. Internally, the pool will reuse or spin up a suitable stream for the request,
|
||||
//! possibly pipelining multiple requests from multiple callers on the same stream (up to some
|
||||
//! queue depth). Idle streams may be removed from the pool after a while to free up the client.
|
||||
//!
|
||||
//! Each channel corresponds to one TCP connection. Each client unary request and each stream
|
||||
//! corresponds to one HTTP/2 stream and server task.
|
||||
@@ -46,31 +29,33 @@
|
||||
//! TODO: error handling (including custom error types).
|
||||
//! TODO: observability.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::num::NonZero;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex, Weak};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use futures::{Stream, StreamExt as _};
|
||||
use tokio::sync::{OwnedSemaphorePermit, Semaphore, watch};
|
||||
use tokio_stream::wrappers::WatchStream;
|
||||
use futures::StreamExt as _;
|
||||
use tokio::sync::mpsc::{Receiver, Sender};
|
||||
use tokio::sync::{OwnedSemaphorePermit, Semaphore, mpsc, oneshot};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tonic::codec::CompressionEncoding;
|
||||
use tonic::transport::{Channel, Endpoint};
|
||||
use tracing::{error, warn};
|
||||
|
||||
use pageserver_page_api as page_api;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::shard::ShardIndex;
|
||||
|
||||
/// Reap clients/streams that have been idle for this long. Channels are reaped immediately when
|
||||
/// empty, and indirectly rely on the client/stream idle timeouts.
|
||||
/// Reap channels/clients/streams that have been idle for this long.
|
||||
///
|
||||
/// A stream's client will be reaped after 2x the idle threshold (first stream the client), but
|
||||
/// that's okay -- if the stream closes abruptly (e.g. due to timeout or cancellation), we want to
|
||||
/// keep its client around in the pool for a while.
|
||||
/// TODO: this is per-pool. For nested pools, it can take up to 3x as long for a TCP connection to
|
||||
/// be reaped. First, we must wait for an idle stream to be reaped, which marks its client as idle.
|
||||
/// Then, we must wait for the idle client to be reaped, which marks its channel as idle. Then, we
|
||||
/// must wait for the idle channel to be reaped. Is that a problem? Maybe not, we just have to
|
||||
/// account for it when setting the reap threshold. Alternatively, we can immediately reap empty
|
||||
/// channels, and/or stream pool clients.
|
||||
const REAP_IDLE_THRESHOLD: Duration = match cfg!(any(test, feature = "testing")) {
|
||||
false => Duration::from_secs(180),
|
||||
true => Duration::from_secs(1), // exercise reaping in tests
|
||||
@@ -98,6 +83,8 @@ pub struct ChannelPool {
|
||||
max_clients_per_channel: NonZero<usize>,
|
||||
/// Open channels.
|
||||
channels: Mutex<BTreeMap<ChannelID, ChannelEntry>>,
|
||||
/// Reaps idle channels.
|
||||
idle_reaper: Reaper,
|
||||
/// Channel ID generator.
|
||||
next_channel_id: AtomicUsize,
|
||||
}
|
||||
@@ -109,6 +96,9 @@ struct ChannelEntry {
|
||||
channel: Channel,
|
||||
/// Number of clients using this channel.
|
||||
clients: usize,
|
||||
/// The channel has been idle (no clients) since this time. None if channel is in use.
|
||||
/// INVARIANT: Some if clients == 0, otherwise None.
|
||||
idle_since: Option<Instant>,
|
||||
}
|
||||
|
||||
impl ChannelPool {
|
||||
@@ -118,12 +108,15 @@ impl ChannelPool {
|
||||
E: TryInto<Endpoint> + Send + Sync + 'static,
|
||||
<E as TryInto<Endpoint>>::Error: std::error::Error + Send + Sync,
|
||||
{
|
||||
Ok(Arc::new(Self {
|
||||
let pool = Arc::new(Self {
|
||||
endpoint: endpoint.try_into()?,
|
||||
max_clients_per_channel,
|
||||
channels: Mutex::default(),
|
||||
idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),
|
||||
next_channel_id: AtomicUsize::default(),
|
||||
}))
|
||||
});
|
||||
pool.idle_reaper.spawn(&pool);
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
/// Acquires a gRPC channel for a client. Multiple clients may acquire the same channel.
|
||||
@@ -144,17 +137,22 @@ impl ChannelPool {
|
||||
let mut channels = self.channels.lock().unwrap();
|
||||
|
||||
// Try to find an existing channel with available capacity. We check entries in BTreeMap
|
||||
// order, to fill up the lower-ordered channels first. The client/stream pools also prefer
|
||||
// clients with lower-ordered channel IDs first. This will cluster clients in lower-ordered
|
||||
// order, to fill up the lower-ordered channels first. The ClientPool also prefers clients
|
||||
// with lower-ordered channel IDs first. This will cluster clients in lower-ordered
|
||||
// channels, and free up higher-ordered channels such that they can be reaped.
|
||||
for (&id, entry) in channels.iter_mut() {
|
||||
assert!(
|
||||
entry.clients <= self.max_clients_per_channel.get(),
|
||||
"channel overflow"
|
||||
);
|
||||
assert_ne!(entry.clients, 0, "empty channel not reaped");
|
||||
assert_eq!(
|
||||
entry.idle_since.is_some(),
|
||||
entry.clients == 0,
|
||||
"incorrect channel idle state"
|
||||
);
|
||||
if entry.clients < self.max_clients_per_channel.get() {
|
||||
entry.clients += 1;
|
||||
entry.idle_since = None;
|
||||
return ChannelGuard {
|
||||
pool: Arc::downgrade(self),
|
||||
id,
|
||||
@@ -171,6 +169,7 @@ impl ChannelPool {
|
||||
let entry = ChannelEntry {
|
||||
channel: channel.clone(),
|
||||
clients: 1, // account for the guard below
|
||||
idle_since: None,
|
||||
};
|
||||
channels.insert(id, entry);
|
||||
|
||||
@@ -182,6 +181,20 @@ impl ChannelPool {
|
||||
}
|
||||
}
|
||||
|
||||
impl Reapable for ChannelPool {
|
||||
/// Reaps channels that have been idle since before the cutoff.
|
||||
fn reap_idle(&self, cutoff: Instant) {
|
||||
self.channels.lock().unwrap().retain(|_, entry| {
|
||||
let Some(idle_since) = entry.idle_since else {
|
||||
assert_ne!(entry.clients, 0, "empty channel not marked idle");
|
||||
return true;
|
||||
};
|
||||
assert_eq!(entry.clients, 0, "idle channel has clients");
|
||||
idle_since >= cutoff
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Tracks a channel acquired from the pool. The owned inner channel can be obtained with `take()`,
|
||||
/// since the gRPC client requires an owned `Channel`.
|
||||
pub struct ChannelGuard {
|
||||
@@ -198,7 +211,7 @@ impl ChannelGuard {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the channel to the pool. The channel is closed when empty.
|
||||
/// Returns the channel to the pool.
|
||||
impl Drop for ChannelGuard {
|
||||
fn drop(&mut self) {
|
||||
let Some(pool) = self.pool.upgrade() else {
|
||||
@@ -207,12 +220,11 @@ impl Drop for ChannelGuard {
|
||||
|
||||
let mut channels = pool.channels.lock().unwrap();
|
||||
let entry = channels.get_mut(&self.id).expect("unknown channel");
|
||||
assert!(entry.idle_since.is_none(), "active channel marked idle");
|
||||
assert!(entry.clients > 0, "channel underflow");
|
||||
entry.clients -= 1;
|
||||
|
||||
// Reap empty channels immediately.
|
||||
if entry.clients == 0 {
|
||||
channels.remove(&self.id);
|
||||
entry.idle_since = Some(Instant::now()); // mark channel as idle
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,7 +253,8 @@ pub struct ClientPool {
|
||||
///
|
||||
/// The first client in the map will be acquired next. The map is sorted by client ID, which in
|
||||
/// turn is sorted by its channel ID, such that we prefer acquiring idle clients from
|
||||
/// lower-ordered channels. This allows us to free up and reap higher-ordered channels.
|
||||
/// lower-ordered channels. This allows us to free up and reap higher-numbered channels as idle
|
||||
/// clients are reaped.
|
||||
idle: Mutex<BTreeMap<ClientID, ClientEntry>>,
|
||||
/// Reaps idle clients.
|
||||
idle_reaper: Reaper,
|
||||
@@ -297,7 +310,7 @@ impl ClientPool {
|
||||
/// This is moderately performance-sensitive. It is called for every unary request, but these
|
||||
/// establish a new gRPC stream per request so they're already expensive. GetPage requests use
|
||||
/// the `StreamPool` instead.
|
||||
pub async fn get(self: &Arc<Self>) -> tonic::Result<ClientGuard> {
|
||||
pub async fn get(self: &Arc<Self>) -> anyhow::Result<ClientGuard> {
|
||||
// Acquire a permit if the pool is bounded.
|
||||
let mut permit = None;
|
||||
if let Some(limiter) = self.limiter.clone() {
|
||||
@@ -315,7 +328,7 @@ impl ClientPool {
|
||||
});
|
||||
}
|
||||
|
||||
// Construct a new client.
|
||||
// Slow path: construct a new client.
|
||||
let mut channel_guard = self.channel_pool.get();
|
||||
let client = page_api::Client::new(
|
||||
channel_guard.take(),
|
||||
@@ -324,8 +337,7 @@ impl ClientPool {
|
||||
self.shard_id,
|
||||
self.auth_token.clone(),
|
||||
self.compression,
|
||||
)
|
||||
.map_err(|err| tonic::Status::internal(format!("failed to create client: {err}")))?;
|
||||
)?;
|
||||
|
||||
Ok(ClientGuard {
|
||||
pool: Arc::downgrade(self),
|
||||
@@ -395,187 +407,287 @@ impl Drop for ClientGuard {
|
||||
/// A pool of bidirectional gRPC streams. Currently only used for GetPage streams. Each stream
|
||||
/// acquires a client from the inner `ClientPool` for the stream's lifetime.
|
||||
///
|
||||
/// Individual streams only send a single request at a time, and do not pipeline multiple callers
|
||||
/// onto the same stream. Instead, we scale out the number of concurrent streams. This is primarily
|
||||
/// to eliminate head-of-line blocking. See the module documentation for more details.
|
||||
/// Individual streams are not exposed to callers -- instead, the returned guard can be used to send
|
||||
/// a single request and await the response. Internally, requests are multiplexed across streams and
|
||||
/// channels. This allows proper queue depth enforcement and response routing.
|
||||
///
|
||||
/// TODO: consider making this generic over request and response types; not currently needed.
|
||||
pub struct StreamPool {
|
||||
/// The client pool to acquire clients from. Must be unbounded.
|
||||
client_pool: Arc<ClientPool>,
|
||||
/// Idle pooled streams. Acquired streams are removed from here and returned on drop.
|
||||
/// All pooled streams.
|
||||
///
|
||||
/// The first stream in the map will be acquired next. The map is sorted by stream ID, which is
|
||||
/// equivalent to the client ID and in turn sorted by its channel ID. This way we prefer
|
||||
/// acquiring idle streams from lower-ordered channels, which allows us to free up and reap
|
||||
/// higher-ordered channels.
|
||||
idle: Mutex<BTreeMap<StreamID, StreamEntry>>,
|
||||
/// Limits the max number of concurrent streams. None if the pool is unbounded.
|
||||
/// Incoming requests will be sent over an existing stream with available capacity. If all
|
||||
/// streams are full, a new one is spun up and added to the pool (up to `max_streams`). Each
|
||||
/// stream has an associated Tokio task that processes requests and responses.
|
||||
streams: Mutex<HashMap<StreamID, StreamEntry>>,
|
||||
/// The max number of concurrent streams, or None if unbounded.
|
||||
max_streams: Option<NonZero<usize>>,
|
||||
/// The max number of concurrent requests per stream.
|
||||
max_queue_depth: NonZero<usize>,
|
||||
/// Limits the max number of concurrent requests, given by `max_streams * max_queue_depth`.
|
||||
/// None if the pool is unbounded.
|
||||
limiter: Option<Arc<Semaphore>>,
|
||||
/// Reaps idle streams.
|
||||
idle_reaper: Reaper,
|
||||
/// Stream ID generator.
|
||||
next_stream_id: AtomicUsize,
|
||||
}
|
||||
|
||||
/// The stream ID. Reuses the inner client ID.
|
||||
type StreamID = ClientID;
|
||||
type StreamID = usize;
|
||||
type RequestSender = Sender<(page_api::GetPageRequest, ResponseSender)>;
|
||||
type RequestReceiver = Receiver<(page_api::GetPageRequest, ResponseSender)>;
|
||||
type ResponseSender = oneshot::Sender<tonic::Result<page_api::GetPageResponse>>;
|
||||
|
||||
/// A pooled stream.
|
||||
struct StreamEntry {
|
||||
/// The bidirectional stream.
|
||||
stream: BiStream,
|
||||
/// The time when this stream was last used, i.e. when it was put back into `StreamPool::idle`.
|
||||
idle_since: Instant,
|
||||
}
|
||||
|
||||
/// A bidirectional GetPage stream and its client. Can send requests and receive responses.
|
||||
struct BiStream {
|
||||
/// The owning client. Holds onto the channel slot while the stream is alive.
|
||||
client: ClientGuard,
|
||||
/// Stream for sending requests. Uses a watch channel, so it can only send a single request at a
|
||||
/// time, and the caller must await the response before sending another request. This is
|
||||
/// enforced by `StreamGuard::send`.
|
||||
sender: watch::Sender<page_api::GetPageRequest>,
|
||||
/// Stream for receiving responses.
|
||||
receiver: Pin<Box<dyn Stream<Item = tonic::Result<page_api::GetPageResponse>> + Send>>,
|
||||
/// Sends caller requests to the stream task. The stream task exits when this is dropped.
|
||||
sender: RequestSender,
|
||||
/// Number of in-flight requests on this stream.
|
||||
queue_depth: usize,
|
||||
/// The time when this stream went idle (queue_depth == 0).
|
||||
/// INVARIANT: Some if queue_depth == 0, otherwise None.
|
||||
idle_since: Option<Instant>,
|
||||
}
|
||||
|
||||
impl StreamPool {
|
||||
/// Creates a new stream pool, using the given client pool. It will use up to `max_streams`
|
||||
/// concurrent streams.
|
||||
/// Creates a new stream pool, using the given client pool. It will send up to `max_queue_depth`
|
||||
/// concurrent requests on each stream, and use up to `max_streams` concurrent streams.
|
||||
///
|
||||
/// The client pool must be unbounded. The stream pool will enforce its own limits, and because
|
||||
/// streams are long-lived they can cause persistent starvation if they exhaust the client pool.
|
||||
/// The stream pool should generally have its own dedicated client pool (but it can share a
|
||||
/// channel pool with others since these are always unbounded).
|
||||
pub fn new(client_pool: Arc<ClientPool>, max_streams: Option<NonZero<usize>>) -> Arc<Self> {
|
||||
pub fn new(
|
||||
client_pool: Arc<ClientPool>,
|
||||
max_streams: Option<NonZero<usize>>,
|
||||
max_queue_depth: NonZero<usize>,
|
||||
) -> Arc<Self> {
|
||||
assert!(client_pool.limiter.is_none(), "bounded client pool");
|
||||
let pool = Arc::new(Self {
|
||||
client_pool,
|
||||
idle: Mutex::default(),
|
||||
limiter: max_streams.map(|max_streams| Arc::new(Semaphore::new(max_streams.get()))),
|
||||
streams: Mutex::default(),
|
||||
limiter: max_streams.map(|max_streams| {
|
||||
Arc::new(Semaphore::new(max_streams.get() * max_queue_depth.get()))
|
||||
}),
|
||||
max_streams,
|
||||
max_queue_depth,
|
||||
idle_reaper: Reaper::new(REAP_IDLE_THRESHOLD, REAP_IDLE_INTERVAL),
|
||||
next_stream_id: AtomicUsize::default(),
|
||||
});
|
||||
pool.idle_reaper.spawn(&pool);
|
||||
pool
|
||||
}
|
||||
|
||||
/// Acquires an available stream from the pool, or spins up a new stream if all streams are
|
||||
/// full. Returns a guard that can be used to send requests and await the responses. Blocks if
|
||||
/// the pool is full.
|
||||
/// Acquires an available stream from the pool, or spins up a new stream async if all streams
|
||||
/// are full. Returns a guard that can be used to send a single request on the stream and await
|
||||
/// the response, with queue depth quota already acquired. Blocks if the pool is at capacity
|
||||
/// (i.e. `CLIENT_LIMIT * STREAM_QUEUE_DEPTH` requests in flight).
|
||||
///
|
||||
/// This is very performance-sensitive, as it is on the GetPage hot path.
|
||||
///
|
||||
/// TODO: is a `Mutex<BTreeMap>` performant enough? Will it become too contended? We can't
|
||||
/// trivially use e.g. DashMap or sharding, because we want to pop lower-ordered streams first
|
||||
/// to free up higher-ordered channels.
|
||||
pub async fn get(self: &Arc<Self>) -> tonic::Result<StreamGuard> {
|
||||
/// TODO: this must do something more sophisticated for performance. We want:
|
||||
///
|
||||
/// * Cheap, concurrent access in the common case where we can use a pooled stream.
|
||||
/// * Quick acquisition of pooled streams with available capacity.
|
||||
/// * Prefer streams that belong to lower-numbered channels, to reap idle channels.
|
||||
/// * Prefer filling up existing streams' queue depth before spinning up new streams.
|
||||
/// * Don't hold a lock while spinning up new streams.
|
||||
/// * Allow concurrent clients to join onto streams while they're spun up.
|
||||
/// * Allow spinning up multiple streams concurrently, but don't overshoot limits.
|
||||
///
|
||||
/// For now, we just do something simple but inefficient (linear scan under mutex).
|
||||
pub async fn get(self: &Arc<Self>) -> StreamGuard {
|
||||
// Acquire a permit if the pool is bounded.
|
||||
let mut permit = None;
|
||||
if let Some(limiter) = self.limiter.clone() {
|
||||
permit = Some(limiter.acquire_owned().await.expect("never closed"));
|
||||
}
|
||||
let mut streams = self.streams.lock().unwrap();
|
||||
|
||||
// Fast path: acquire an idle stream from the pool.
|
||||
if let Some((_, entry)) = self.idle.lock().unwrap().pop_first() {
|
||||
return Ok(StreamGuard {
|
||||
pool: Arc::downgrade(self),
|
||||
stream: Some(entry.stream),
|
||||
can_reuse: true,
|
||||
permit,
|
||||
});
|
||||
// Look for a pooled stream with available capacity.
|
||||
for (&id, entry) in streams.iter_mut() {
|
||||
assert!(
|
||||
entry.queue_depth <= self.max_queue_depth.get(),
|
||||
"stream queue overflow"
|
||||
);
|
||||
assert_eq!(
|
||||
entry.idle_since.is_some(),
|
||||
entry.queue_depth == 0,
|
||||
"incorrect stream idle state"
|
||||
);
|
||||
if entry.queue_depth < self.max_queue_depth.get() {
|
||||
entry.queue_depth += 1;
|
||||
entry.idle_since = None;
|
||||
return StreamGuard {
|
||||
pool: Arc::downgrade(self),
|
||||
id,
|
||||
sender: entry.sender.clone(),
|
||||
permit,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Spin up a new stream. Uses a watch channel to send a single request at a time, since
|
||||
// `StreamGuard::send` enforces this anyway and it avoids unnecessary channel overhead.
|
||||
let mut client = self.client_pool.get().await?;
|
||||
// No available stream, spin up a new one. We install the stream entry in the pool first and
|
||||
// return the guard, while spinning up the stream task async. This allows other callers to
|
||||
// join onto this stream and also create additional streams concurrently if this fills up.
|
||||
let id = self.next_stream_id.fetch_add(1, Ordering::Relaxed);
|
||||
let (req_tx, req_rx) = mpsc::channel(self.max_queue_depth.get());
|
||||
let entry = StreamEntry {
|
||||
sender: req_tx.clone(),
|
||||
queue_depth: 1, // reserve quota for this caller
|
||||
idle_since: None,
|
||||
};
|
||||
streams.insert(id, entry);
|
||||
|
||||
let (req_tx, req_rx) = watch::channel(page_api::GetPageRequest::default());
|
||||
let req_stream = WatchStream::from_changes(req_rx);
|
||||
let resp_stream = client.get_pages(req_stream).await?;
|
||||
if let Some(max_streams) = self.max_streams {
|
||||
assert!(streams.len() <= max_streams.get(), "stream overflow");
|
||||
};
|
||||
|
||||
Ok(StreamGuard {
|
||||
let client_pool = self.client_pool.clone();
|
||||
let pool = Arc::downgrade(self);
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(err) = Self::run_stream(client_pool, req_rx).await {
|
||||
error!("stream failed: {err}");
|
||||
}
|
||||
// Remove stream from pool on exit. Weak reference to avoid holding the pool alive.
|
||||
if let Some(pool) = pool.upgrade() {
|
||||
let entry = pool.streams.lock().unwrap().remove(&id);
|
||||
assert!(entry.is_some(), "unknown stream ID: {id}");
|
||||
}
|
||||
});
|
||||
|
||||
StreamGuard {
|
||||
pool: Arc::downgrade(self),
|
||||
stream: Some(BiStream {
|
||||
client,
|
||||
sender: req_tx,
|
||||
receiver: Box::pin(resp_stream),
|
||||
}),
|
||||
can_reuse: true,
|
||||
id,
|
||||
sender: req_tx,
|
||||
permit,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs a stream task. This acquires a client from the `ClientPool` and establishes a
|
||||
/// bidirectional GetPage stream, then forwards requests and responses between callers and the
|
||||
/// stream. It does not track or enforce queue depths -- that's done by `get()` since it must be
|
||||
/// atomic with pool stream acquisition.
|
||||
///
|
||||
/// The task exits when the request channel is closed, or on a stream error. The caller is
|
||||
/// responsible for removing the stream from the pool on exit.
|
||||
async fn run_stream(
|
||||
client_pool: Arc<ClientPool>,
|
||||
mut caller_rx: RequestReceiver,
|
||||
) -> anyhow::Result<()> {
|
||||
// Acquire a client from the pool and create a stream.
|
||||
let mut client = client_pool.get().await?;
|
||||
|
||||
// NB: use an unbounded channel such that the stream send never blocks. Otherwise, we could
|
||||
// theoretically deadlock if both the client and server block on sends (since we're not
|
||||
// reading responses while sending). This is unlikely to happen due to gRPC/TCP buffers and
|
||||
// low queue depths, but it was seen to happen with the libpq protocol so better safe than
|
||||
// sorry. It should never buffer more than the queue depth anyway, but using an unbounded
|
||||
// channel guarantees that it will never block.
|
||||
let (req_tx, req_rx) = mpsc::unbounded_channel();
|
||||
let req_stream = tokio_stream::wrappers::UnboundedReceiverStream::new(req_rx);
|
||||
let mut resp_stream = client.get_pages(req_stream).await?;
|
||||
|
||||
// Track caller response channels by request ID. If the task returns early, these response
|
||||
// channels will be dropped and the waiting callers will receive an error.
|
||||
//
|
||||
// NB: this will leak entries if the server doesn't respond to a request (by request ID).
|
||||
// It shouldn't happen, and if it does it will often hold onto queue depth quota anyway and
|
||||
// block further use. But we could consider reaping closed channels after some time.
|
||||
let mut callers = HashMap::new();
|
||||
|
||||
// Process requests and responses.
|
||||
loop {
|
||||
tokio::select! {
|
||||
// Receive requests from callers and send them to the stream.
|
||||
req = caller_rx.recv() => {
|
||||
// Shut down if request channel is closed.
|
||||
let Some((req, resp_tx)) = req else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// Store the response channel by request ID.
|
||||
if callers.contains_key(&req.request_id) {
|
||||
// Error on request ID duplicates. Ignore callers that went away.
|
||||
_ = resp_tx.send(Err(tonic::Status::invalid_argument(
|
||||
format!("duplicate request ID: {}", req.request_id),
|
||||
)));
|
||||
continue;
|
||||
}
|
||||
callers.insert(req.request_id, resp_tx);
|
||||
|
||||
// Send the request on the stream. Bail out if the stream is closed.
|
||||
req_tx.send(req).map_err(|_| {
|
||||
tonic::Status::unavailable("stream closed")
|
||||
})?;
|
||||
}
|
||||
|
||||
// Receive responses from the stream and send them to callers.
|
||||
resp = resp_stream.next() => {
|
||||
// Shut down if the stream is closed, and bail out on stream errors.
|
||||
let Some(resp) = resp.transpose()? else {
|
||||
return Ok(())
|
||||
};
|
||||
|
||||
// Send the response to the caller. Ignore errors if the caller went away.
|
||||
let Some(resp_tx) = callers.remove(&resp.request_id) else {
|
||||
warn!("received response for unknown request ID: {}", resp.request_id);
|
||||
continue;
|
||||
};
|
||||
_ = resp_tx.send(Ok(resp));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Reapable for StreamPool {
|
||||
/// Reaps streams that have been idle since before the cutoff.
|
||||
fn reap_idle(&self, cutoff: Instant) {
|
||||
self.idle
|
||||
.lock()
|
||||
.unwrap()
|
||||
.retain(|_, entry| entry.idle_since >= cutoff);
|
||||
self.streams.lock().unwrap().retain(|_, entry| {
|
||||
let Some(idle_since) = entry.idle_since else {
|
||||
assert_ne!(entry.queue_depth, 0, "empty stream not marked idle");
|
||||
return true;
|
||||
};
|
||||
assert_eq!(entry.queue_depth, 0, "idle stream has requests");
|
||||
idle_since >= cutoff
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// A stream acquired from the pool. Returned to the pool when dropped, unless there are still
|
||||
/// in-flight requests on the stream, or the stream failed.
|
||||
/// A pooled stream reference. Can be used to send a single request, to properly enforce queue
|
||||
/// depth. Queue depth is already reserved and will be returned on drop.
|
||||
pub struct StreamGuard {
|
||||
pool: Weak<StreamPool>,
|
||||
stream: Option<BiStream>, // Some until dropped
|
||||
can_reuse: bool, // returned to pool if true
|
||||
id: StreamID,
|
||||
sender: RequestSender,
|
||||
permit: Option<OwnedSemaphorePermit>, // None if pool is unbounded
|
||||
}
|
||||
|
||||
impl StreamGuard {
|
||||
/// Sends a request on the stream and awaits the response. If the future is dropped before it
|
||||
/// resolves (e.g. due to a timeout or cancellation), the stream will be closed to cancel the
|
||||
/// request and is not returned to the pool. The same is true if the stream errors, in which
|
||||
/// case the caller can't send further requests on the stream.
|
||||
/// Sends a request on the stream and awaits the response. Consumes the guard, since it's only
|
||||
/// valid for a single request (to enforce queue depth). This also drops the guard on return and
|
||||
/// returns the queue depth quota to the pool.
|
||||
///
|
||||
/// We only support sending a single request at a time, to eliminate head-of-line blocking. See
|
||||
/// module documentation for details.
|
||||
/// The `GetPageRequest::request_id` must be unique across in-flight requests.
|
||||
///
|
||||
/// NB: errors are often returned as `GetPageResponse::status_code` instead of `tonic::Status`
|
||||
/// to avoid tearing down the stream for per-request errors. Callers must check this.
|
||||
pub async fn send(
|
||||
&mut self,
|
||||
self,
|
||||
req: page_api::GetPageRequest,
|
||||
) -> tonic::Result<page_api::GetPageResponse> {
|
||||
let req_id = req.request_id;
|
||||
let stream = self.stream.as_mut().expect("not dropped");
|
||||
let (resp_tx, resp_rx) = oneshot::channel();
|
||||
|
||||
// Mark the stream as not reusable while the request is in flight. We can't return the
|
||||
// stream to the pool until we receive the response, to avoid head-of-line blocking and
|
||||
// stale responses. Failed streams can't be reused either.
|
||||
if !self.can_reuse {
|
||||
return Err(tonic::Status::internal("stream can't be reused"));
|
||||
}
|
||||
self.can_reuse = false;
|
||||
|
||||
// Send the request and receive the response.
|
||||
//
|
||||
// NB: this uses a watch channel, so it's unsafe to change this code to pipeline requests.
|
||||
stream
|
||||
.sender
|
||||
.send(req)
|
||||
self.sender
|
||||
.send((req, resp_tx))
|
||||
.await
|
||||
.map_err(|_| tonic::Status::unavailable("stream closed"))?;
|
||||
|
||||
let resp = stream
|
||||
.receiver
|
||||
.next()
|
||||
resp_rx
|
||||
.await
|
||||
.ok_or_else(|| tonic::Status::unavailable("stream closed"))??;
|
||||
|
||||
if resp.request_id != req_id {
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"response ID {} does not match request ID {}",
|
||||
resp.request_id, req_id
|
||||
)));
|
||||
}
|
||||
|
||||
// Success, mark the stream as reusable.
|
||||
self.can_reuse = true;
|
||||
|
||||
Ok(resp)
|
||||
.map_err(|_| tonic::Status::unavailable("stream closed"))?
|
||||
}
|
||||
}
|
||||
|
||||
@@ -585,21 +697,26 @@ impl Drop for StreamGuard {
|
||||
return; // pool was dropped
|
||||
};
|
||||
|
||||
// If the stream isn't reusable, it can't be returned to the pool.
|
||||
if !self.can_reuse {
|
||||
return;
|
||||
// Release the queue depth reservation on drop. This can prematurely decrement it if dropped
|
||||
// before the response is received, but that's okay.
|
||||
//
|
||||
// TODO: actually, it's probably not okay. Queue depth release should be moved into the
|
||||
// stream task, such that it continues to account for the queue depth slot until the server
|
||||
// responds. Otherwise, if a slow request times out and keeps blocking the stream, the
|
||||
// server will keep waiting on it and we can pile on subsequent requests (including the
|
||||
// timeout retry) in the same stream and get blocked. But we may also want to avoid blocking
|
||||
// requests on e.g. LSN waits and layer downloads, instead returning early to free up the
|
||||
// stream. Or just scale out streams with a queue depth of 1 to sidestep all head-of-line
|
||||
// blocking. TBD.
|
||||
let mut streams = pool.streams.lock().unwrap();
|
||||
let entry = streams.get_mut(&self.id).expect("unknown stream");
|
||||
assert!(entry.idle_since.is_none(), "active stream marked idle");
|
||||
assert!(entry.queue_depth > 0, "stream queue underflow");
|
||||
entry.queue_depth -= 1;
|
||||
if entry.queue_depth == 0 {
|
||||
entry.idle_since = Some(Instant::now()); // mark stream as idle
|
||||
}
|
||||
|
||||
// Place the idle stream back into the pool.
|
||||
let entry = StreamEntry {
|
||||
stream: self.stream.take().expect("dropped once"),
|
||||
idle_since: Instant::now(),
|
||||
};
|
||||
pool.idle
|
||||
.lock()
|
||||
.unwrap()
|
||||
.insert(entry.stream.client.id, entry);
|
||||
|
||||
_ = self.permit; // returned on drop, referenced for visibility
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::future::pending;
|
||||
use tokio::time::Instant;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
@@ -9,54 +8,60 @@ use utils::backoff::exponential_backoff_duration;
|
||||
/// A retry handler for Pageserver gRPC requests.
|
||||
///
|
||||
/// This is used instead of backoff::retry for better control and observability.
|
||||
pub struct Retry {
|
||||
/// Timeout across all retry attempts. If None, retries forever.
|
||||
pub timeout: Option<Duration>,
|
||||
/// The initial backoff duration. The first retry does not use a backoff.
|
||||
pub base_backoff: Duration,
|
||||
/// The maximum backoff duration.
|
||||
pub max_backoff: Duration,
|
||||
}
|
||||
pub struct Retry;
|
||||
|
||||
impl Retry {
|
||||
/// Runs the given async closure with timeouts and retries (exponential backoff). Logs errors,
|
||||
/// using the current tracing span for context.
|
||||
/// The per-request timeout.
|
||||
// TODO: tune these, and/or make them configurable. Should we retry forever?
|
||||
const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
/// The total timeout across all attempts
|
||||
const TOTAL_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
/// The initial backoff duration.
|
||||
const BASE_BACKOFF: Duration = Duration::from_millis(10);
|
||||
/// The maximum backoff duration.
|
||||
const MAX_BACKOFF: Duration = Duration::from_secs(10);
|
||||
/// If true, log successful requests. For debugging.
|
||||
const LOG_SUCCESS: bool = false;
|
||||
|
||||
/// Runs the given async closure with timeouts and retries (exponential backoff), passing the
|
||||
/// attempt number starting at 0. Logs errors, using the current tracing span for context.
|
||||
///
|
||||
/// Only certain gRPC status codes are retried, see [`Self::should_retry`].
|
||||
/// Only certain gRPC status codes are retried, see [`Self::should_retry`]. For default
|
||||
/// timeouts, see [`Self::REQUEST_TIMEOUT`] and [`Self::TOTAL_TIMEOUT`].
|
||||
pub async fn with<T, F, O>(&self, mut f: F) -> tonic::Result<T>
|
||||
where
|
||||
F: FnMut(usize) -> O, // pass attempt number, starting at 0
|
||||
F: FnMut(usize) -> O, // takes attempt number, starting at 0
|
||||
O: Future<Output = tonic::Result<T>>,
|
||||
{
|
||||
let started = Instant::now();
|
||||
let deadline = self.timeout.map(|timeout| started + timeout);
|
||||
let deadline = started + Self::TOTAL_TIMEOUT;
|
||||
let mut last_error = None;
|
||||
let mut retries = 0;
|
||||
loop {
|
||||
// Set up a future to wait for the backoff, if any, and run the closure.
|
||||
// Set up a future to wait for the backoff (if any) and run the request with a timeout.
|
||||
let backoff_and_try = async {
|
||||
// NB: sleep() always sleeps 1ms, even when given a 0 argument. See:
|
||||
// https://github.com/tokio-rs/tokio/issues/6866
|
||||
if let Some(backoff) = self.backoff_duration(retries) {
|
||||
if let Some(backoff) = Self::backoff_duration(retries) {
|
||||
tokio::time::sleep(backoff).await;
|
||||
}
|
||||
|
||||
f(retries).await
|
||||
let request_started = Instant::now();
|
||||
tokio::time::timeout(Self::REQUEST_TIMEOUT, f(retries))
|
||||
.await
|
||||
.map_err(|_| {
|
||||
tonic::Status::deadline_exceeded(format!(
|
||||
"request timed out after {:.3}s",
|
||||
request_started.elapsed().as_secs_f64()
|
||||
))
|
||||
})?
|
||||
};
|
||||
|
||||
// Set up a future for the timeout, if any.
|
||||
let timeout = async {
|
||||
match deadline {
|
||||
Some(deadline) => tokio::time::sleep_until(deadline).await,
|
||||
None => pending().await,
|
||||
}
|
||||
};
|
||||
|
||||
// Wait for the backoff and request, or bail out if the timeout is exceeded.
|
||||
// Wait for the backoff and request, or bail out if the total timeout is exceeded.
|
||||
let result = tokio::select! {
|
||||
result = backoff_and_try => result,
|
||||
|
||||
_ = timeout => {
|
||||
_ = tokio::time::sleep_until(deadline) => {
|
||||
let last_error = last_error.unwrap_or_else(|| {
|
||||
tonic::Status::deadline_exceeded(format!(
|
||||
"request timed out after {:.3}s",
|
||||
@@ -74,7 +79,7 @@ impl Retry {
|
||||
match result {
|
||||
// Success, return the result.
|
||||
Ok(result) => {
|
||||
if retries > 0 {
|
||||
if retries > 0 || Self::LOG_SUCCESS {
|
||||
info!(
|
||||
"request succeeded after {retries} retries in {:.3}s",
|
||||
started.elapsed().as_secs_f64(),
|
||||
@@ -107,13 +112,12 @@ impl Retry {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the backoff duration for the given retry attempt, or None for no backoff. The first
|
||||
/// attempt and first retry never backs off, so this returns None for 0 and 1 retries.
|
||||
fn backoff_duration(&self, retries: usize) -> Option<Duration> {
|
||||
/// Returns the backoff duration for the given retry attempt, or None for no backoff.
|
||||
fn backoff_duration(retry: usize) -> Option<Duration> {
|
||||
let backoff = exponential_backoff_duration(
|
||||
(retries as u32).saturating_sub(1), // first retry does not back off
|
||||
self.base_backoff.as_secs_f64(),
|
||||
self.max_backoff.as_secs_f64(),
|
||||
retry as u32,
|
||||
Self::BASE_BACKOFF.as_secs_f64(),
|
||||
Self::MAX_BACKOFF.as_secs_f64(),
|
||||
);
|
||||
(!backoff.is_zero()).then_some(backoff)
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@ impl From<ProtocolError> for tonic::Status {
|
||||
}
|
||||
|
||||
/// The LSN a request should read at.
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct ReadLsn {
|
||||
/// The request's read LSN.
|
||||
pub request_lsn: Lsn,
|
||||
@@ -329,7 +329,7 @@ impl From<GetDbSizeResponse> for proto::GetDbSizeResponse {
|
||||
}
|
||||
|
||||
/// Requests one or more pages.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GetPageRequest {
|
||||
/// A request ID. Will be included in the response. Should be unique for in-flight requests on
|
||||
/// the stream.
|
||||
@@ -430,13 +430,12 @@ impl From<RequestID> for proto::RequestId {
|
||||
}
|
||||
|
||||
/// A GetPage request class.
|
||||
#[derive(Clone, Copy, Debug, Default, strum_macros::Display)]
|
||||
#[derive(Clone, Copy, Debug, strum_macros::Display)]
|
||||
pub enum GetPageClass {
|
||||
/// Unknown class. For backwards compatibility: used when an older client version sends a class
|
||||
/// that a newer server version has removed.
|
||||
Unknown,
|
||||
/// A normal request. This is the default.
|
||||
#[default]
|
||||
Normal,
|
||||
/// A prefetch request. NB: can only be classified on pg < 18.
|
||||
Prefetch,
|
||||
@@ -444,6 +443,19 @@ pub enum GetPageClass {
|
||||
Background,
|
||||
}
|
||||
|
||||
impl GetPageClass {
|
||||
/// Returns true if this is considered a bulk request (i.e. more throughput-oriented rather than
|
||||
/// latency-sensitive).
|
||||
pub fn is_bulk(&self) -> bool {
|
||||
match self {
|
||||
Self::Unknown => false,
|
||||
Self::Normal => false,
|
||||
Self::Prefetch => true,
|
||||
Self::Background => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<proto::GetPageClass> for GetPageClass {
|
||||
fn from(pb: proto::GetPageClass) -> Self {
|
||||
match pb {
|
||||
|
||||
@@ -16,7 +16,6 @@ futures.workspace = true
|
||||
hdrhistogram.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
pprof.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use futures::StreamExt;
|
||||
use tonic::transport::Endpoint;
|
||||
use tracing::info;
|
||||
|
||||
use pageserver_page_api::{GetPageClass, GetPageRequest, GetPageStatusCode, ReadLsn, RelTag};
|
||||
use utils::id::TenantTimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
use utils::shard::ShardIndex;
|
||||
|
||||
/// Starts a large number of idle gRPC GetPage streams.
|
||||
#[derive(clap::Parser)]
|
||||
pub(crate) struct Args {
|
||||
/// The Pageserver to connect to. Must use grpc://.
|
||||
#[clap(long, default_value = "grpc://localhost:51051")]
|
||||
server: String,
|
||||
/// The Pageserver HTTP API.
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
http_server: String,
|
||||
/// The number of streams to open.
|
||||
#[clap(long, default_value = "100000")]
|
||||
count: usize,
|
||||
/// Number of streams per connection.
|
||||
#[clap(long, default_value = "100")]
|
||||
per_connection: usize,
|
||||
/// Send a single GetPage request on each stream.
|
||||
#[clap(long, default_value_t = false)]
|
||||
send_request: bool,
|
||||
}
|
||||
|
||||
pub(crate) fn main(args: Args) -> anyhow::Result<()> {
|
||||
let rt = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
|
||||
rt.block_on(main_impl(args))
|
||||
}
|
||||
|
||||
async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
// Discover a tenant and timeline to use.
|
||||
let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new(
|
||||
reqwest::Client::new(),
|
||||
args.http_server.clone(),
|
||||
None,
|
||||
));
|
||||
let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
|
||||
&mgmt_api_client,
|
||||
crate::util::cli::targets::Spec {
|
||||
limit_to_first_n_targets: Some(1),
|
||||
targets: None,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
let ttid = timelines
|
||||
.first()
|
||||
.ok_or_else(|| anyhow!("no timelines found"))?;
|
||||
|
||||
// Set up the initial client.
|
||||
let endpoint = Endpoint::from_shared(args.server.clone())?;
|
||||
|
||||
let connect = async || {
|
||||
pageserver_page_api::Client::new(
|
||||
endpoint.connect().await?,
|
||||
ttid.tenant_id,
|
||||
ttid.timeline_id,
|
||||
ShardIndex::unsharded(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
};
|
||||
|
||||
let mut client = connect().await?;
|
||||
let mut streams = Vec::with_capacity(args.count);
|
||||
|
||||
// Create streams.
|
||||
for i in 0..args.count {
|
||||
if i % 100 == 0 {
|
||||
info!("opened {}/{} streams", i, args.count);
|
||||
}
|
||||
if i % args.per_connection == 0 && i > 0 {
|
||||
client = connect().await?;
|
||||
}
|
||||
|
||||
let (req_tx, req_rx) = tokio::sync::mpsc::unbounded_channel();
|
||||
let req_stream = tokio_stream::wrappers::UnboundedReceiverStream::new(req_rx);
|
||||
let mut resp_stream = client.get_pages(req_stream).await?;
|
||||
|
||||
// Send request if specified.
|
||||
if args.send_request {
|
||||
req_tx.send(GetPageRequest {
|
||||
request_id: 1.into(),
|
||||
request_class: GetPageClass::Normal,
|
||||
read_lsn: ReadLsn {
|
||||
request_lsn: Lsn::MAX,
|
||||
not_modified_since_lsn: Some(Lsn(1)),
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: 1664, // pg_global
|
||||
dbnode: 0, // shared database
|
||||
relnode: 1262, // pg_authid
|
||||
forknum: 0, // init
|
||||
},
|
||||
block_numbers: vec![0],
|
||||
})?;
|
||||
let resp = resp_stream
|
||||
.next()
|
||||
.await
|
||||
.transpose()?
|
||||
.ok_or_else(|| anyhow!("no response"))?;
|
||||
if resp.status_code != GetPageStatusCode::Ok {
|
||||
return Err(anyhow!("{} response", resp.status_code));
|
||||
}
|
||||
}
|
||||
|
||||
// Hold onto streams to avoid closing them.
|
||||
streams.push((req_tx, resp_stream));
|
||||
}
|
||||
|
||||
info!("opened {} streams, sleeping", args.count);
|
||||
|
||||
// Block forever, to hold the idle streams open for inspection.
|
||||
futures::future::pending::<()>().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,7 +1,4 @@
|
||||
use std::fs::File;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing::info;
|
||||
use utils::logging;
|
||||
|
||||
/// Re-usable pieces of code that aren't CLI-specific.
|
||||
@@ -20,73 +17,38 @@ mod cmd {
|
||||
pub(super) mod aux_files;
|
||||
pub(super) mod basebackup;
|
||||
pub(super) mod getpage_latest_lsn;
|
||||
pub(super) mod idle_streams;
|
||||
pub(super) mod ondemand_download_churn;
|
||||
pub(super) mod trigger_initial_size_calculation;
|
||||
}
|
||||
|
||||
/// Component-level performance test for pageserver.
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
/// Takes a client CPU profile into profile.svg. The benchmark must exit cleanly before it's
|
||||
/// written, e.g. via --runtime.
|
||||
#[arg(long)]
|
||||
profile: bool,
|
||||
|
||||
#[command(subcommand)]
|
||||
subcommand: Subcommand,
|
||||
}
|
||||
|
||||
#[derive(clap::Subcommand)]
|
||||
enum Subcommand {
|
||||
enum Args {
|
||||
Basebackup(cmd::basebackup::Args),
|
||||
GetPageLatestLsn(cmd::getpage_latest_lsn::Args),
|
||||
TriggerInitialSizeCalculation(cmd::trigger_initial_size_calculation::Args),
|
||||
OndemandDownloadChurn(cmd::ondemand_download_churn::Args),
|
||||
AuxFiles(cmd::aux_files::Args),
|
||||
IdleStreams(cmd::idle_streams::Args),
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
fn main() {
|
||||
logging::init(
|
||||
logging::LogFormat::Plain,
|
||||
logging::TracingErrorLayerEnablement::Disabled,
|
||||
logging::Output::Stderr,
|
||||
)?;
|
||||
)
|
||||
.unwrap();
|
||||
logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
// Start a CPU profile if requested.
|
||||
let mut profiler = None;
|
||||
if args.profile {
|
||||
profiler = Some(
|
||||
pprof::ProfilerGuardBuilder::default()
|
||||
.frequency(1000)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()?,
|
||||
);
|
||||
}
|
||||
|
||||
match args.subcommand {
|
||||
Subcommand::Basebackup(args) => cmd::basebackup::main(args),
|
||||
Subcommand::GetPageLatestLsn(args) => cmd::getpage_latest_lsn::main(args),
|
||||
Subcommand::TriggerInitialSizeCalculation(args) => {
|
||||
match args {
|
||||
Args::Basebackup(args) => cmd::basebackup::main(args),
|
||||
Args::GetPageLatestLsn(args) => cmd::getpage_latest_lsn::main(args),
|
||||
Args::TriggerInitialSizeCalculation(args) => {
|
||||
cmd::trigger_initial_size_calculation::main(args)
|
||||
}
|
||||
Subcommand::OndemandDownloadChurn(args) => cmd::ondemand_download_churn::main(args),
|
||||
Subcommand::AuxFiles(args) => cmd::aux_files::main(args),
|
||||
Subcommand::IdleStreams(args) => cmd::idle_streams::main(args),
|
||||
}?;
|
||||
|
||||
// Generate a CPU flamegraph if requested.
|
||||
if let Some(profiler) = profiler {
|
||||
let report = profiler.report().build()?;
|
||||
drop(profiler); // stop profiling
|
||||
let file = File::create("profile.svg")?;
|
||||
report.flamegraph(file)?;
|
||||
info!("wrote CPU profile flamegraph to profile.svg")
|
||||
Args::OndemandDownloadChurn(args) => cmd::ondemand_download_churn::main(args),
|
||||
Args::AuxFiles(args) => cmd::aux_files::main(args),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ use anyhow::{Context, anyhow};
|
||||
use async_compression::tokio::write::GzipEncoder;
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use fail::fail_point;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::key::{Key, rel_block_to_key};
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants::{PG_HBA, PGDATA_SPECIAL_FILES};
|
||||
@@ -114,7 +115,7 @@ where
|
||||
// Compute postgres doesn't have any previous WAL files, but the first
|
||||
// record that it's going to write needs to include the LSN of the
|
||||
// previous record (xl_prev). We include prev_record_lsn in the
|
||||
// "neon.signal" file, so that postgres can read it during startup.
|
||||
// "zenith.signal" file, so that postgres can read it during startup.
|
||||
//
|
||||
// We don't keep full history of record boundaries in the page server,
|
||||
// however, only the predecessor of the latest record on each
|
||||
@@ -751,39 +752,34 @@ where
|
||||
|
||||
//
|
||||
// Add generated pg_control file and bootstrap WAL segment.
|
||||
// Also send neon.signal and zenith.signal file with extra bootstrap data.
|
||||
// Also send zenith.signal file with extra bootstrap data.
|
||||
//
|
||||
async fn add_pgcontrol_file(
|
||||
&mut self,
|
||||
pg_control_bytes: Bytes,
|
||||
system_identifier: u64,
|
||||
) -> Result<(), BasebackupError> {
|
||||
// add neon.signal file
|
||||
let mut neon_signal = String::new();
|
||||
// add zenith.signal file
|
||||
let mut zenith_signal = String::new();
|
||||
if self.prev_record_lsn == Lsn(0) {
|
||||
if self.timeline.is_ancestor_lsn(self.lsn) {
|
||||
write!(neon_signal, "PREV LSN: none")
|
||||
write!(zenith_signal, "PREV LSN: none")
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
} else {
|
||||
write!(neon_signal, "PREV LSN: invalid")
|
||||
write!(zenith_signal, "PREV LSN: invalid")
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
}
|
||||
} else {
|
||||
write!(neon_signal, "PREV LSN: {}", self.prev_record_lsn)
|
||||
write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
}
|
||||
|
||||
// TODO: Remove zenith.signal once all historical computes have been replaced
|
||||
// ... and thus support the neon.signal file.
|
||||
for signalfilename in ["neon.signal", "zenith.signal"] {
|
||||
self.ar
|
||||
.append(
|
||||
&new_tar_header(signalfilename, neon_signal.len() as u64)?,
|
||||
neon_signal.as_bytes(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,neon.signal"))?;
|
||||
}
|
||||
self.ar
|
||||
.append(
|
||||
&new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
|
||||
zenith_signal.as_bytes(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,zenith.signal"))?;
|
||||
|
||||
//send pg_control
|
||||
let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
|
||||
|
||||
@@ -68,7 +68,7 @@ const FEATURES: &[&str] = &[
|
||||
fn version() -> String {
|
||||
format!(
|
||||
"{GIT_VERSION} failpoints: {}, features: {:?}",
|
||||
fail::has_failpoints(),
|
||||
neon_failpoint::has_failpoints(),
|
||||
FEATURES,
|
||||
)
|
||||
}
|
||||
@@ -84,7 +84,7 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
// Initialize up failpoints support
|
||||
let scenario = failpoint_support::init();
|
||||
failpoint_support::init().unwrap();
|
||||
|
||||
let workdir = arg_matches
|
||||
.get_one::<String>("workdir")
|
||||
@@ -221,7 +221,6 @@ fn main() -> anyhow::Result<()> {
|
||||
|
||||
start_pageserver(launch_ts, conf, ignored, otel_guard).context("Failed to start pageserver")?;
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -366,16 +365,9 @@ fn start_pageserver(
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
let failpoints = neon_failpoint::list();
|
||||
for (name, actions) in failpoints {
|
||||
info!("starting with failpoint: {name} {actions}");
|
||||
}
|
||||
|
||||
// Create and lock PID file. This ensures that there cannot be more than one
|
||||
@@ -917,6 +909,11 @@ async fn create_remote_storage_client(
|
||||
// If `test_remote_failures` is non-zero, wrap the client with a
|
||||
// wrapper that simulates failures.
|
||||
if conf.test_remote_failures > 0 {
|
||||
if !cfg!(feature = "testing") {
|
||||
anyhow::bail!(
|
||||
"test_remote_failures option is not available because pageserver was compiled without the 'testing' feature"
|
||||
);
|
||||
}
|
||||
info!(
|
||||
"Simulating remote failures for first {} attempts of each op",
|
||||
conf.test_remote_failures
|
||||
|
||||
@@ -6,6 +6,8 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use super::{NewMetricsRoot, NewRawMetric, RawMetric};
|
||||
use crate::consumption_metrics::NewMetricsRefRoot;
|
||||
|
||||
use neon_failpoint as fail;
|
||||
|
||||
pub(super) fn read_metrics_from_serde_value(
|
||||
json_value: serde_json::Value,
|
||||
) -> anyhow::Result<Vec<NewRawMetric>> {
|
||||
@@ -129,7 +131,7 @@ pub(super) async fn flush_metrics_to_disk(
|
||||
tempfile.flush()?;
|
||||
tempfile.as_file().sync_all()?;
|
||||
|
||||
fail::fail_point!("before-persist-last-metrics-collected");
|
||||
fail::fail_point_sync!("before-persist-last-metrics-collected");
|
||||
|
||||
drop(tempfile.persist(&*path).map_err(|e| e.error)?);
|
||||
|
||||
|
||||
@@ -194,7 +194,6 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
|
||||
listen_http_port: m.http_port,
|
||||
listen_https_port: m.https_port,
|
||||
availability_zone_id: az_id.expect("Checked above"),
|
||||
node_ip_addr: None,
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use neon_failpoint as fail;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info, warn};
|
||||
|
||||
@@ -10,7 +10,6 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use bytes::Bytes;
|
||||
use enumset::EnumSet;
|
||||
use futures::future::join_all;
|
||||
use futures::{StreamExt, TryFutureExt};
|
||||
@@ -29,6 +28,7 @@ use http_utils::{RequestExt, RouterBuilder};
|
||||
use humantime::format_rfc3339;
|
||||
use hyper::{Body, Request, Response, StatusCode, Uri, header};
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::models::virtual_file::IoMode;
|
||||
use pageserver_api::models::{
|
||||
DetachBehavior, DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest,
|
||||
@@ -47,7 +47,6 @@ use pageserver_api::shard::{ShardCount, TenantShardId};
|
||||
use postgres_ffi::PgMajorVersion;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};
|
||||
use scopeguard::defer;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tenant_size_model::svg::SvgBranchKind;
|
||||
use tenant_size_model::{SizeResult, StorageModel};
|
||||
@@ -59,7 +58,6 @@ use utils::auth::SwappableJwtAuth;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use wal_decoder::models::record::NeonWalRecord;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context;
|
||||
@@ -80,7 +78,6 @@ use crate::tenant::remote_timeline_client::{
|
||||
};
|
||||
use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::ValuesReconstructState;
|
||||
use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
|
||||
use crate::tenant::timeline::layer_manager::LayerManagerLockHolder;
|
||||
use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
|
||||
@@ -401,7 +398,6 @@ async fn build_timeline_info(
|
||||
timeline: &Arc<Timeline>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
force_await_initial_logical_size: bool,
|
||||
include_image_consistent_lsn: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -426,10 +422,6 @@ async fn build_timeline_info(
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
// HADRON
|
||||
if include_image_consistent_lsn {
|
||||
info.image_consistent_lsn = Some(timeline.compute_image_consistent_lsn().await?);
|
||||
}
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
@@ -519,8 +511,6 @@ async fn build_timeline_info_common(
|
||||
is_invisible: Some(is_invisible),
|
||||
|
||||
walreceiver_status,
|
||||
// HADRON
|
||||
image_consistent_lsn: None,
|
||||
};
|
||||
Ok(info)
|
||||
}
|
||||
@@ -723,8 +713,6 @@ async fn timeline_list_handler(
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
let force_await_initial_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "force-await-initial-logical-size")?;
|
||||
let include_image_consistent_lsn: Option<bool> =
|
||||
parse_query_param(&request, "include-image-consistent-lsn")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
@@ -745,7 +733,6 @@ async fn timeline_list_handler(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size.unwrap_or(false),
|
||||
force_await_initial_logical_size.unwrap_or(false),
|
||||
include_image_consistent_lsn.unwrap_or(false),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
|
||||
@@ -774,9 +761,6 @@ async fn timeline_and_offloaded_list_handler(
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
let force_await_initial_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "force-await-initial-logical-size")?;
|
||||
let include_image_consistent_lsn: Option<bool> =
|
||||
parse_query_param(&request, "include-image-consistent-lsn")?;
|
||||
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
@@ -797,7 +781,6 @@ async fn timeline_and_offloaded_list_handler(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size.unwrap_or(false),
|
||||
force_await_initial_logical_size.unwrap_or(false),
|
||||
include_image_consistent_lsn.unwrap_or(false),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
|
||||
@@ -982,9 +965,6 @@ async fn timeline_detail_handler(
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
let force_await_initial_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "force-await-initial-logical-size")?;
|
||||
// HADRON
|
||||
let include_image_consistent_lsn: Option<bool> =
|
||||
parse_query_param(&request, "include-image-consistent-lsn")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
// Logical size calculation needs downloading.
|
||||
@@ -1005,7 +985,6 @@ async fn timeline_detail_handler(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size.unwrap_or(false),
|
||||
force_await_initial_logical_size.unwrap_or(false),
|
||||
include_image_consistent_lsn.unwrap_or(false),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -2712,16 +2691,6 @@ async fn deletion_queue_flush(
|
||||
}
|
||||
}
|
||||
|
||||
/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
struct GetPageResponse {
|
||||
pub page: Bytes,
|
||||
pub layers_visited: u32,
|
||||
pub delta_layers_visited: u32,
|
||||
pub records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pub img: Option<(Lsn, Bytes)>,
|
||||
}
|
||||
|
||||
async fn getpage_at_lsn_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
@@ -2772,24 +2741,21 @@ async fn getpage_at_lsn_handler_inner(
|
||||
|
||||
// Use last_record_lsn if no lsn is provided
|
||||
let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
|
||||
let page = timeline.get(key.0, lsn, &ctx).await?;
|
||||
|
||||
if touch {
|
||||
json_response(StatusCode::OK, ())
|
||||
} else {
|
||||
let mut reconstruct_state = ValuesReconstructState::new_with_debug(IoConcurrency::sequential());
|
||||
let page = timeline.debug_get(key.0, lsn, &ctx, &mut reconstruct_state).await?;
|
||||
let response = GetPageResponse {
|
||||
page,
|
||||
layers_visited: reconstruct_state.get_layers_visited(),
|
||||
delta_layers_visited: reconstruct_state.get_delta_layers_visited(),
|
||||
records: reconstruct_state.debug_state.records.clone(),
|
||||
img: reconstruct_state.debug_state.img.clone(),
|
||||
};
|
||||
|
||||
json_response(StatusCode::OK, response)
|
||||
Result::<_, ApiError>::Ok(
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "application/octet-stream")
|
||||
.body(hyper::Body::from(page))
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
||||
.instrument(info_span!("timeline_debug_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -3678,7 +3644,6 @@ async fn activate_post_import_handler(
|
||||
let timeline_info = build_timeline_info(
|
||||
&timeline, false, // include_non_incremental_logical_size,
|
||||
false, // force_await_initial_logical_size
|
||||
false, // include_image_consistent_lsn
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -4011,7 +3976,7 @@ pub fn make_router(
|
||||
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
|
||||
.get("/profile/heap", |r| request_span(r, profile_heap_handler))
|
||||
.get("/v1/status", |r| api_handler(r, status_handler))
|
||||
.put("/v1/failpoints", |r| {
|
||||
.post("/v1/failpoints", |r| {
|
||||
testing_api_handler("manage failpoints", r, failpoints_handler)
|
||||
})
|
||||
.post("/v1/reload_auth_validation_keys", |r| {
|
||||
@@ -4200,7 +4165,7 @@ pub fn make_router(
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
|
||||
|r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
|
||||
|r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/touchpage",
|
||||
|
||||
@@ -610,13 +610,13 @@ async fn import_file(
|
||||
debug!("imported twophase file");
|
||||
} else if file_path.starts_with("pg_wal") {
|
||||
debug!("found wal file in base section. ignore it");
|
||||
} else if file_path.starts_with("zenith.signal") || file_path.starts_with("neon.signal") {
|
||||
} else if file_path.starts_with("zenith.signal") {
|
||||
// Parse zenith signal file to set correct previous LSN
|
||||
let bytes = read_all_bytes(reader).await?;
|
||||
// neon.signal format is "PREV LSN: prev_lsn"
|
||||
// zenith.signal format is "PREV LSN: prev_lsn"
|
||||
// TODO write serialization and deserialization in the same place.
|
||||
let neon_signal = std::str::from_utf8(&bytes)?.trim();
|
||||
let prev_lsn = match neon_signal {
|
||||
let zenith_signal = std::str::from_utf8(&bytes)?.trim();
|
||||
let prev_lsn = match zenith_signal {
|
||||
"PREV LSN: none" => Lsn(0),
|
||||
"PREV LSN: invalid" => Lsn(0),
|
||||
other => {
|
||||
@@ -624,17 +624,17 @@ async fn import_file(
|
||||
split[1]
|
||||
.trim()
|
||||
.parse::<Lsn>()
|
||||
.context("can't parse neon.signal")?
|
||||
.context("can't parse zenith.signal")?
|
||||
}
|
||||
};
|
||||
|
||||
// neon.signal is not necessarily the last file, that we handle
|
||||
// zenith.signal is not necessarily the last file, that we handle
|
||||
// but it is ok to call `finish_write()`, because final `modification.commit()`
|
||||
// will update lsn once more to the final one.
|
||||
let writer = modification.tline.writer().await;
|
||||
writer.finish_write(prev_lsn);
|
||||
|
||||
debug!("imported neon signal {}", prev_lsn);
|
||||
debug!("imported zenith signal {}", prev_lsn);
|
||||
} else if file_path.starts_with("pg_tblspc") {
|
||||
// TODO Backups exported from neon won't have pg_tblspc, but we will need
|
||||
// this to import arbitrary postgres databases.
|
||||
|
||||
@@ -19,6 +19,7 @@ use futures::future::BoxFuture;
|
||||
use futures::{FutureExt, Stream};
|
||||
use itertools::Itertools;
|
||||
use jsonwebtoken::TokenData;
|
||||
use neon_failpoint as fail;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::config::{
|
||||
GetVectoredConcurrentIo, PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
@@ -335,18 +336,21 @@ async fn page_service_conn_main(
|
||||
|
||||
let default_timeout_ms = 10 * 60 * 1000; // 10 minutes by default
|
||||
let socket_timeout_ms = (|| {
|
||||
fail::fail_point!("simulated-bad-compute-connection", |avg_timeout_ms| {
|
||||
// Exponential distribution for simulating
|
||||
// poor network conditions, expect about avg_timeout_ms to be around 15
|
||||
// in tests
|
||||
if let Some(avg_timeout_ms) = avg_timeout_ms {
|
||||
let avg = avg_timeout_ms.parse::<i64>().unwrap() as f32;
|
||||
let u = rand::random::<f32>();
|
||||
((1.0 - u).ln() / (-avg)) as u64
|
||||
} else {
|
||||
default_timeout_ms
|
||||
fail::fail_point_sync!(
|
||||
"simulated-bad-compute-connection",
|
||||
|avg_timeout_ms: Option<String>| {
|
||||
// Exponential distribution for simulating
|
||||
// poor network conditions, expect about avg_timeout_ms to be around 15
|
||||
// in tests
|
||||
if let Some(avg_timeout_ms) = avg_timeout_ms {
|
||||
let avg = avg_timeout_ms.parse::<i64>().unwrap() as f32;
|
||||
let u = rand::random::<f32>();
|
||||
((1.0 - u).ln() / (-avg)) as u64
|
||||
} else {
|
||||
default_timeout_ms
|
||||
}
|
||||
}
|
||||
});
|
||||
);
|
||||
default_timeout_ms
|
||||
})();
|
||||
|
||||
@@ -3043,7 +3047,7 @@ where
|
||||
_pgb: &mut PostgresBackend<IO>,
|
||||
sm: &FeStartupPacket,
|
||||
) -> Result<(), QueryError> {
|
||||
fail::fail_point!("ps::connection-start::startup-packet");
|
||||
fail::fail_point_sync!("ps::connection-start::startup-packet");
|
||||
|
||||
if let FeStartupPacket::StartupMessage { params, .. } = sm {
|
||||
if let Some(app_name) = params.get("application_name") {
|
||||
|
||||
@@ -14,6 +14,7 @@ use crate::{PERF_TRACE_TARGET, ensure_walingest};
|
||||
use anyhow::Context;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use enum_map::Enum;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::key::{
|
||||
AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, CompactKey, DBDIR_KEY, Key, RelDirExists,
|
||||
TWOPHASEDIR_KEY, dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range,
|
||||
|
||||
@@ -30,6 +30,7 @@ use enumset::EnumSet;
|
||||
use futures::StreamExt;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use itertools::Itertools as _;
|
||||
use neon_failpoint as fail;
|
||||
use once_cell::sync::Lazy;
|
||||
pub use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::{self, RelSizeMigration};
|
||||
@@ -3393,13 +3394,7 @@ impl TenantShard {
|
||||
.collect_vec();
|
||||
|
||||
for timeline in timelines {
|
||||
// Include a span with the timeline ID. The parent span already has the tenant ID.
|
||||
let span =
|
||||
info_span!("maybe_freeze_ephemeral_layer", timeline_id = %timeline.timeline_id);
|
||||
timeline
|
||||
.maybe_freeze_ephemeral_layer()
|
||||
.instrument(span)
|
||||
.await;
|
||||
timeline.maybe_freeze_ephemeral_layer().await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9577,7 +9572,7 @@ mod tests {
|
||||
writer.finish_write(Lsn(0x30));
|
||||
drop(writer);
|
||||
|
||||
fail::cfg(
|
||||
neon_failpoint::configure_failpoint(
|
||||
"flush-layer-before-update-remote-consistent-lsn",
|
||||
"return()",
|
||||
)
|
||||
@@ -12822,40 +12817,6 @@ mod tests {
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_force_image_creation_lsn() -> anyhow::Result<()> {
|
||||
let tenant_conf = pageserver_api::models::TenantConfig {
|
||||
pitr_interval: Some(Duration::from_secs(7 * 3600)),
|
||||
image_layer_force_creation_period: Some(Duration::from_secs(3600)),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
let harness = TenantHarness::create_custom(
|
||||
"test_get_force_image_creation_lsn",
|
||||
tenant_conf,
|
||||
tenant_id,
|
||||
ShardIdentity::unsharded(),
|
||||
Generation::new(1),
|
||||
)
|
||||
.await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let timeline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
timeline.gc_info.write().unwrap().cutoffs.time = Some(Lsn(100));
|
||||
{
|
||||
let writer = timeline.writer().await;
|
||||
writer.finish_write(Lsn(5000));
|
||||
}
|
||||
|
||||
let image_creation_lsn = timeline.get_force_image_creation_lsn().unwrap();
|
||||
assert_eq!(image_creation_lsn, Lsn(4300));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,11 +46,10 @@
|
||||
mod historic_layer_coverage;
|
||||
mod layer_coverage;
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, VecDeque};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::iter::Peekable;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::Result;
|
||||
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
||||
@@ -905,103 +904,6 @@ impl LayerMap {
|
||||
max_stacked_deltas
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
/**
|
||||
* Compute the image consistent LSN, the largest LSN below which all pages have been redone successfully.
|
||||
* It works by first finding the latest image layers and store them into a map. Then for each delta layer,
|
||||
* find all overlapping image layers in order to potentially increase the image LSN in case there are gaps
|
||||
* (e.g., if an image is created at LSN 100 but the delta layer spans LSN [150, 200], then we can increase
|
||||
* image LSN to 150 because there is no WAL record in between).
|
||||
* Finally, the image consistent LSN is computed by taking the minimum of all image layers.
|
||||
*/
|
||||
pub fn compute_image_consistent_lsn(&self, disk_consistent_lsn: Lsn) -> Lsn {
|
||||
struct ImageLayerInfo {
|
||||
// creation LSN of the image layer
|
||||
image_lsn: Lsn,
|
||||
// the current minimum LSN of newer delta layers with overlapping key ranges
|
||||
min_delta_lsn: Lsn,
|
||||
}
|
||||
let started_at = Instant::now();
|
||||
|
||||
let min_l0_deltas_lsn = {
|
||||
let l0_deltas = self.level0_deltas();
|
||||
l0_deltas
|
||||
.iter()
|
||||
.map(|layer| layer.get_lsn_range().start)
|
||||
.min()
|
||||
.unwrap_or(disk_consistent_lsn)
|
||||
};
|
||||
let global_key_range = Key::MIN..Key::MAX;
|
||||
|
||||
// step 1: collect all most recent image layers into a map
|
||||
// map: end key to image_layer_info
|
||||
let mut image_map: BTreeMap<Key, ImageLayerInfo> = BTreeMap::new();
|
||||
for (img_range, img) in self.image_coverage(&global_key_range, disk_consistent_lsn) {
|
||||
let img_lsn = img.map(|layer| layer.get_lsn_range().end).unwrap_or(Lsn(0));
|
||||
image_map.insert(
|
||||
img_range.end,
|
||||
ImageLayerInfo {
|
||||
image_lsn: img_lsn,
|
||||
min_delta_lsn: min_l0_deltas_lsn,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// step 2: go through all delta layers, and update the image layer info with overlapping
|
||||
// key ranges
|
||||
for layer in self.historic.iter() {
|
||||
if !layer.is_delta {
|
||||
continue;
|
||||
}
|
||||
let delta_key_range = layer.get_key_range();
|
||||
let delta_lsn_range = layer.get_lsn_range();
|
||||
for (img_end_key, img_info) in image_map.range_mut(delta_key_range.start..Key::MAX) {
|
||||
debug_assert!(img_end_key >= &delta_key_range.start);
|
||||
if delta_lsn_range.end > img_info.image_lsn {
|
||||
// the delta layer includes WAL records after the image
|
||||
// it's possibel that the delta layer's start LSN < image LSN, which will be simply ignored by step 3
|
||||
img_info.min_delta_lsn =
|
||||
std::cmp::min(img_info.min_delta_lsn, delta_lsn_range.start);
|
||||
}
|
||||
if img_end_key >= &delta_key_range.end {
|
||||
// we have fully processed all overlapping image layers
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// step 3, go through all image layers and find the image consistent LSN
|
||||
let mut img_consistent_lsn = min_l0_deltas_lsn.checked_sub(Lsn(1)).unwrap();
|
||||
let mut prev_key = Key::MIN;
|
||||
for (img_key, img_info) in image_map {
|
||||
tracing::debug!(
|
||||
"Image layer {:?}:{} has min delta lsn {}",
|
||||
Range {
|
||||
start: prev_key,
|
||||
end: img_key,
|
||||
},
|
||||
img_info.image_lsn,
|
||||
img_info.min_delta_lsn,
|
||||
);
|
||||
let image_lsn = std::cmp::max(
|
||||
img_info.image_lsn,
|
||||
img_info.min_delta_lsn.checked_sub(Lsn(1)).unwrap_or(Lsn(0)),
|
||||
);
|
||||
img_consistent_lsn = std::cmp::min(img_consistent_lsn, image_lsn);
|
||||
prev_key = img_key;
|
||||
}
|
||||
tracing::info!(
|
||||
"computed image_consistent_lsn {} for disk_consistent_lsn {} in {}ms. Processed {} layrs in total.",
|
||||
img_consistent_lsn,
|
||||
disk_consistent_lsn,
|
||||
started_at.elapsed().as_millis(),
|
||||
self.historic.len()
|
||||
);
|
||||
img_consistent_lsn
|
||||
}
|
||||
|
||||
/* END_HADRON */
|
||||
|
||||
/// Return all L0 delta layers
|
||||
pub fn level0_deltas(&self) -> &Vec<Arc<PersistentLayerDesc>> {
|
||||
&self.l0_delta_layers
|
||||
@@ -1677,138 +1579,6 @@ mod tests {
|
||||
LayerVisibilityHint::Visible
|
||||
));
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
#[test]
|
||||
fn test_compute_image_consistent_lsn() {
|
||||
let mut layer_map = LayerMap::default();
|
||||
|
||||
let disk_consistent_lsn = Lsn(1000);
|
||||
// case 1: empty layer map
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(
|
||||
disk_consistent_lsn.checked_sub(Lsn(1)).unwrap(),
|
||||
image_consistent_lsn
|
||||
);
|
||||
|
||||
// case 2: only L0 delta layer
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(100),
|
||||
Lsn(900)..Lsn(990),
|
||||
true,
|
||||
));
|
||||
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(100),
|
||||
Lsn(850)..Lsn(899),
|
||||
true,
|
||||
));
|
||||
}
|
||||
|
||||
// should use min L0 delta LSN - 1 as image consistent LSN
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(849), image_consistent_lsn);
|
||||
|
||||
// case 3: 3 images, no L1 delta
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(40),
|
||||
Lsn(100)..Lsn(100),
|
||||
false,
|
||||
));
|
||||
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(40)..Key::from_i128(70),
|
||||
Lsn(200)..Lsn(200),
|
||||
false,
|
||||
));
|
||||
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(70)..Key::from_i128(100),
|
||||
Lsn(150)..Lsn(150),
|
||||
false,
|
||||
));
|
||||
}
|
||||
// should use min L0 delta LSN - 1 as image consistent LSN
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(849), image_consistent_lsn);
|
||||
|
||||
// case 4: 3 images with 1 L1 delta
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(50),
|
||||
Lsn(300)..Lsn(350),
|
||||
true,
|
||||
));
|
||||
}
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(299), image_consistent_lsn);
|
||||
|
||||
// case 5: 3 images with 1 more L1 delta with smaller LSN
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(50)..Key::from_i128(72),
|
||||
Lsn(200)..Lsn(300),
|
||||
true,
|
||||
));
|
||||
}
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(199), image_consistent_lsn);
|
||||
|
||||
// case 6: 3 images with more newer L1 deltas (no impact on final results)
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(30),
|
||||
Lsn(400)..Lsn(500),
|
||||
true,
|
||||
));
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(35)..Key::from_i128(100),
|
||||
Lsn(450)..Lsn(600),
|
||||
true,
|
||||
));
|
||||
}
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(199), image_consistent_lsn);
|
||||
|
||||
// case 7: 3 images with more older L1 deltas (no impact on final results)
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(40),
|
||||
Lsn(0)..Lsn(50),
|
||||
true,
|
||||
));
|
||||
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(50)..Key::from_i128(100),
|
||||
Lsn(10)..Lsn(60),
|
||||
true,
|
||||
));
|
||||
}
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(199), image_consistent_lsn);
|
||||
|
||||
// case 8: 3 images with one more L1 delta with overlapping LSN range
|
||||
{
|
||||
let mut updates = layer_map.batch_update();
|
||||
updates.insert_historic(PersistentLayerDesc::new_test(
|
||||
Key::from_i128(0)..Key::from_i128(50),
|
||||
Lsn(50)..Lsn(250),
|
||||
true,
|
||||
));
|
||||
}
|
||||
let image_consistent_lsn = layer_map.compute_image_consistent_lsn(disk_consistent_lsn);
|
||||
assert_eq!(Lsn(100), image_consistent_lsn);
|
||||
}
|
||||
|
||||
/* END_HADRON */
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -12,6 +12,7 @@ use anyhow::Context;
|
||||
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::{DetachBehavior, LocationConfigMode};
|
||||
use pageserver_api::shard::{
|
||||
@@ -1678,8 +1679,6 @@ impl TenantManager {
|
||||
// Phase 6: Release the InProgress on the parent shard
|
||||
drop(parent_slot_guard);
|
||||
|
||||
utils::pausable_failpoint!("shard-split-post-finish-pause");
|
||||
|
||||
Ok(child_shards)
|
||||
}
|
||||
|
||||
|
||||
@@ -194,6 +194,7 @@ pub(crate) use download::{
|
||||
};
|
||||
use index::GcCompactionState;
|
||||
pub(crate) use index::LayerFileMetadata;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::models::{RelSizeMigration, TimelineArchivalState, TimelineVisibilityState};
|
||||
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
||||
use regex::Regex;
|
||||
|
||||
@@ -11,6 +11,7 @@ use std::time::SystemTime;
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{
|
||||
DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
||||
|
||||
@@ -8,6 +8,7 @@ use anyhow::{Context, bail};
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8Path;
|
||||
use fail::fail_point;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
|
||||
use tokio::fs::{self, File};
|
||||
|
||||
@@ -75,7 +75,7 @@ where
|
||||
/// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
|
||||
/// call, to collect more records.
|
||||
///
|
||||
#[derive(Debug, Default, Clone)]
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct ValueReconstructState {
|
||||
pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pub(crate) img: Option<(Lsn, Bytes)>,
|
||||
@@ -308,9 +308,6 @@ pub struct ValuesReconstructState {
|
||||
layers_visited: u32,
|
||||
delta_layers_visited: u32,
|
||||
|
||||
pub(crate) enable_debug: bool,
|
||||
pub(crate) debug_state: ValueReconstructState,
|
||||
|
||||
pub(crate) io_concurrency: IoConcurrency,
|
||||
num_active_ios: Arc<AtomicUsize>,
|
||||
|
||||
@@ -660,23 +657,6 @@ impl ValuesReconstructState {
|
||||
layers_visited: 0,
|
||||
delta_layers_visited: 0,
|
||||
io_concurrency,
|
||||
enable_debug: false,
|
||||
debug_state: ValueReconstructState::default(),
|
||||
num_active_ios: Arc::new(AtomicUsize::new(0)),
|
||||
read_path: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_with_debug(io_concurrency: IoConcurrency) -> Self {
|
||||
Self {
|
||||
keys: HashMap::new(),
|
||||
keys_done: KeySpaceRandomAccum::new(),
|
||||
keys_with_image_coverage: None,
|
||||
layers_visited: 0,
|
||||
delta_layers_visited: 0,
|
||||
io_concurrency,
|
||||
enable_debug: true,
|
||||
debug_state: ValueReconstructState::default(),
|
||||
num_active_ios: Arc::new(AtomicUsize::new(0)),
|
||||
read_path: None,
|
||||
}
|
||||
@@ -690,12 +670,6 @@ impl ValuesReconstructState {
|
||||
self.io_concurrency.spawn_io(fut).await;
|
||||
}
|
||||
|
||||
pub(crate) fn set_debug_state(&mut self, debug_state: &ValueReconstructState) {
|
||||
if self.enable_debug {
|
||||
self.debug_state = debug_state.clone();
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
|
||||
self.layers_visited += 1;
|
||||
if let ReadableLayer::PersistentLayer(layer) = layer {
|
||||
|
||||
@@ -39,6 +39,7 @@ use layer_manager::{
|
||||
LayerManagerLockHolder, LayerManagerReadGuard, LayerManagerWriteGuard, LockedLayerManager,
|
||||
Shutdown,
|
||||
};
|
||||
use neon_failpoint as fail;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
|
||||
@@ -351,6 +352,13 @@ pub struct Timeline {
|
||||
last_image_layer_creation_check_at: AtomicLsn,
|
||||
last_image_layer_creation_check_instant: std::sync::Mutex<Option<Instant>>,
|
||||
|
||||
// HADRON
|
||||
/// If a key range has writes with LSN > force_image_creation_lsn, then we should force image layer creation
|
||||
/// on this key range.
|
||||
force_image_creation_lsn: AtomicLsn,
|
||||
/// The last time instant when force_image_creation_lsn is computed.
|
||||
force_image_creation_lsn_computed_at: std::sync::Mutex<Option<Instant>>,
|
||||
|
||||
/// Current logical size of the "datadir", at the last LSN.
|
||||
current_logical_size: LogicalSize,
|
||||
|
||||
@@ -1253,57 +1261,6 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) async fn debug_get(
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
if !lsn.is_valid() {
|
||||
return Err(PageReconstructError::Other(anyhow::anyhow!("Invalid LSN")));
|
||||
}
|
||||
|
||||
// This check is debug-only because of the cost of hashing, and because it's a double-check: we
|
||||
// already checked the key against the shard_identity when looking up the Timeline from
|
||||
// page_service.
|
||||
debug_assert!(!self.shard_identity.is_key_disposable(&key));
|
||||
|
||||
let query = VersionedKeySpaceQuery::uniform(KeySpace::single(key..key.next()), lsn);
|
||||
let vectored_res = self
|
||||
.debug_get_vectored_impl(query, reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
let key_value = vectored_res?.pop_first();
|
||||
match key_value {
|
||||
Some((got_key, value)) => {
|
||||
if got_key != key {
|
||||
error!(
|
||||
"Expected {}, but singular vectored get returned {}",
|
||||
key, got_key
|
||||
);
|
||||
Err(PageReconstructError::Other(anyhow!(
|
||||
"Singular vectored get returned wrong key"
|
||||
)))
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
None => Err(PageReconstructError::MissingKey(Box::new(
|
||||
MissingKeyError {
|
||||
keyspace: KeySpace::single(key..key.next()),
|
||||
shard: self.shard_identity.get_shard_number(&key),
|
||||
original_hwm_lsn: lsn,
|
||||
ancestor_lsn: None,
|
||||
backtrace: None,
|
||||
read_path: None,
|
||||
query: None,
|
||||
},
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;
|
||||
|
||||
/// Look up multiple page versions at a given LSN
|
||||
@@ -1598,98 +1555,6 @@ impl Timeline {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
// A copy of the get_vectored_impl method except that we store the image and wal records into `reconstruct_state`.
|
||||
// This is only used in the http getpage call for debugging purpose.
|
||||
pub(super) async fn debug_get_vectored_impl(
|
||||
&self,
|
||||
query: VersionedKeySpaceQuery,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
|
||||
if query.is_empty() {
|
||||
return Ok(BTreeMap::default());
|
||||
}
|
||||
|
||||
let read_path = if self.conf.enable_read_path_debugging || ctx.read_path_debug() {
|
||||
Some(ReadPath::new(
|
||||
query.total_keyspace(),
|
||||
query.high_watermark_lsn()?,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
reconstruct_state.read_path = read_path;
|
||||
|
||||
let traversal_res: Result<(), _> = self
|
||||
.get_vectored_reconstruct_data(query.clone(), reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
if let Err(err) = traversal_res {
|
||||
// Wait for all the spawned IOs to complete.
|
||||
// See comments on `spawn_io` inside `storage_layer` for more details.
|
||||
let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)
|
||||
.into_values()
|
||||
.map(|state| state.collect_pending_ios())
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
while collect_futs.next().await.is_some() {}
|
||||
return Err(err);
|
||||
};
|
||||
|
||||
let reconstruct_state = Arc::new(Mutex::new(reconstruct_state));
|
||||
let futs = FuturesUnordered::new();
|
||||
|
||||
for (key, state) in std::mem::take(&mut reconstruct_state.lock().unwrap().keys) {
|
||||
let req_lsn_for_key = query.map_key_to_lsn(&key);
|
||||
futs.push({
|
||||
let walredo_self = self.myself.upgrade().expect("&self method holds the arc");
|
||||
let rc_clone = Arc::clone(&reconstruct_state);
|
||||
|
||||
async move {
|
||||
assert_eq!(state.situation, ValueReconstructSituation::Complete);
|
||||
|
||||
let converted = match state.collect_pending_ios().await {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => {
|
||||
return (key, Err(err));
|
||||
}
|
||||
};
|
||||
DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64);
|
||||
|
||||
// The walredo module expects the records to be descending in terms of Lsn.
|
||||
// And we submit the IOs in that order, so, there shuold be no need to sort here.
|
||||
debug_assert!(
|
||||
converted
|
||||
.records
|
||||
.is_sorted_by_key(|(lsn, _)| std::cmp::Reverse(*lsn)),
|
||||
"{converted:?}"
|
||||
);
|
||||
{
|
||||
let mut guard = rc_clone.lock().unwrap();
|
||||
guard.set_debug_state(&converted);
|
||||
}
|
||||
(
|
||||
key,
|
||||
walredo_self
|
||||
.reconstruct_value(
|
||||
key,
|
||||
req_lsn_for_key,
|
||||
converted,
|
||||
RedoAttemptType::ReadPage,
|
||||
)
|
||||
.await,
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let results = futs
|
||||
.collect::<BTreeMap<Key, Result<Bytes, PageReconstructError>>>()
|
||||
.await;
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
|
||||
pub(crate) fn get_last_record_lsn(&self) -> Lsn {
|
||||
self.last_record_lsn.load().last
|
||||
@@ -2036,8 +1901,6 @@ impl Timeline {
|
||||
// an ephemeral layer open forever when idle. It also freezes layers if the global limit on
|
||||
// ephemeral layer bytes has been breached.
|
||||
pub(super) async fn maybe_freeze_ephemeral_layer(&self) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let Ok(mut write_guard) = self.write_lock.try_lock() else {
|
||||
// If the write lock is held, there is an active wal receiver: rolling open layers
|
||||
// is their responsibility while they hold this lock.
|
||||
@@ -2992,7 +2855,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// HADRON
|
||||
fn get_image_layer_force_creation_period(&self) -> Option<Duration> {
|
||||
fn get_image_creation_timeout(&self) -> Option<Duration> {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
.tenant_conf
|
||||
@@ -3272,6 +3135,9 @@ impl Timeline {
|
||||
repartition_threshold: 0,
|
||||
last_image_layer_creation_check_at: AtomicLsn::new(0),
|
||||
last_image_layer_creation_check_instant: Mutex::new(None),
|
||||
// HADRON
|
||||
force_image_creation_lsn: AtomicLsn::new(0),
|
||||
force_image_creation_lsn_computed_at: std::sync::Mutex::new(None),
|
||||
last_received_wal: Mutex::new(None),
|
||||
rel_size_latest_cache: RwLock::new(HashMap::new()),
|
||||
rel_size_snapshot_cache: Mutex::new(LruCache::new(relsize_snapshot_cache_capacity)),
|
||||
@@ -5318,7 +5184,9 @@ impl Timeline {
|
||||
*self.applied_gc_cutoff_lsn.read(),
|
||||
);
|
||||
|
||||
fail_point!("checkpoint-before-saving-metadata", |x| bail!(
|
||||
neon_failpoint::fail_point_sync!("checkpoint-before-saving-metadata", |x: Option<
|
||||
String,
|
||||
>| bail!(
|
||||
"{}",
|
||||
x.unwrap()
|
||||
));
|
||||
@@ -5516,16 +5384,13 @@ impl Timeline {
|
||||
}
|
||||
|
||||
// HADRON
|
||||
// for child timelines, we consider all pages up to ancestor_LSN are redone successfully by the parent timeline
|
||||
min_image_lsn = min_image_lsn.max(self.get_ancestor_lsn());
|
||||
if min_image_lsn < force_image_creation_lsn.unwrap_or(Lsn(0)) && max_deltas > 0 {
|
||||
info!(
|
||||
"forcing image creation for partitioned range {}-{}. Min image LSN: {}, force image creation LSN: {}, num deltas: {}",
|
||||
"forcing image creation for partitioned range {}-{}. Min image LSN: {}, force image creation LSN: {}",
|
||||
partition.ranges[0].start,
|
||||
partition.ranges[0].end,
|
||||
min_image_lsn,
|
||||
force_image_creation_lsn.unwrap(),
|
||||
max_deltas
|
||||
force_image_creation_lsn.unwrap()
|
||||
);
|
||||
return true;
|
||||
}
|
||||
@@ -5749,11 +5614,10 @@ impl Timeline {
|
||||
/// Predicate function which indicates whether we should check if new image layers
|
||||
/// are required. Since checking if new image layers are required is expensive in
|
||||
/// terms of CPU, we only do it in the following cases:
|
||||
/// 1. If the timeline has ingested sufficient WAL to justify the cost or ...
|
||||
/// 1. If the timeline has ingested sufficient WAL to justify the cost
|
||||
/// 2. If enough time has passed since the last check:
|
||||
/// 1. For large tenants, we wish to perform the check more often since they
|
||||
/// suffer from the lack of image layers. Note that we assume sharded tenants
|
||||
/// to be large since non-zero shards do not track the logical size.
|
||||
/// suffer from the lack of image layers
|
||||
/// 2. For small tenants (that can mostly fit in RAM), we use a much longer interval
|
||||
fn should_check_if_image_layers_required(self: &Arc<Timeline>, lsn: Lsn) -> bool {
|
||||
let large_timeline_threshold = self.conf.image_layer_generation_large_timeline_threshold;
|
||||
@@ -5767,39 +5631,30 @@ impl Timeline {
|
||||
|
||||
let distance_based_decision = distance.0 >= min_distance;
|
||||
|
||||
let mut time_based_decision = false;
|
||||
let mut last_check_instant = self.last_image_layer_creation_check_instant.lock().unwrap();
|
||||
let check_required_after = (|| {
|
||||
if self.shard_identity.is_unsharded() {
|
||||
if let CurrentLogicalSize::Exact(logical_size) =
|
||||
self.current_logical_size.current_size()
|
||||
{
|
||||
if Some(Into::<u64>::into(&logical_size)) < large_timeline_threshold {
|
||||
return Duration::from_secs(3600 * 48);
|
||||
}
|
||||
if let CurrentLogicalSize::Exact(logical_size) = self.current_logical_size.current_size() {
|
||||
let check_required_after =
|
||||
if Some(Into::<u64>::into(&logical_size)) >= large_timeline_threshold {
|
||||
self.get_checkpoint_timeout()
|
||||
} else {
|
||||
Duration::from_secs(3600 * 48)
|
||||
};
|
||||
|
||||
time_based_decision = match *last_check_instant {
|
||||
Some(last_check) => {
|
||||
let elapsed = last_check.elapsed();
|
||||
elapsed >= check_required_after
|
||||
}
|
||||
}
|
||||
|
||||
self.get_checkpoint_timeout()
|
||||
})();
|
||||
|
||||
let time_based_decision = match *last_check_instant {
|
||||
Some(last_check) => {
|
||||
let elapsed = last_check.elapsed();
|
||||
elapsed >= check_required_after
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
None => true,
|
||||
};
|
||||
}
|
||||
|
||||
// Do the expensive delta layer counting only if this timeline has ingested sufficient
|
||||
// WAL since the last check or a checkpoint timeout interval has elapsed since the last
|
||||
// check.
|
||||
let decision = distance_based_decision || time_based_decision;
|
||||
tracing::info!(
|
||||
"Decided to check image layers: {}. Distance-based decision: {}, time-based decision: {}",
|
||||
decision,
|
||||
distance_based_decision,
|
||||
time_based_decision
|
||||
);
|
||||
|
||||
if decision {
|
||||
self.last_image_layer_creation_check_at.store(lsn);
|
||||
*last_check_instant = Some(Instant::now());
|
||||
@@ -7301,19 +7156,6 @@ impl Timeline {
|
||||
.unwrap()
|
||||
.clone()
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
pub(crate) async fn compute_image_consistent_lsn(&self) -> anyhow::Result<Lsn> {
|
||||
let guard = self
|
||||
.layers
|
||||
.read(LayerManagerLockHolder::ComputeImageConsistentLsn)
|
||||
.await;
|
||||
let layer_map = guard.layer_map()?;
|
||||
let disk_consistent_lsn = self.get_disk_consistent_lsn();
|
||||
|
||||
Ok(layer_map.compute_image_consistent_lsn(disk_consistent_lsn))
|
||||
}
|
||||
/* END_HADRON */
|
||||
}
|
||||
|
||||
impl Timeline {
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::cmp::min;
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
|
||||
use std::ops::{Deref, Range};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use super::layer_manager::LayerManagerLockHolder;
|
||||
use super::{
|
||||
@@ -26,6 +26,7 @@ use enumset::EnumSet;
|
||||
use fail::fail_point;
|
||||
use futures::FutureExt;
|
||||
use itertools::Itertools;
|
||||
use neon_failpoint as fail;
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE;
|
||||
use pageserver_api::key::{KEY_SIZE, Key};
|
||||
@@ -34,6 +35,7 @@ use pageserver_api::models::{CompactInfoResponse, CompactKeyRange};
|
||||
use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
|
||||
use pageserver_compaction::helpers::{fully_contains, overlaps_with};
|
||||
use pageserver_compaction::interface::*;
|
||||
use postgres_ffi::to_pg_timestamp;
|
||||
use serde::Serialize;
|
||||
use tokio::sync::{OwnedSemaphorePermit, Semaphore};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -46,6 +48,7 @@ use wal_decoder::models::value::Value;
|
||||
|
||||
use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
|
||||
use crate::page_cache;
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::statvfs::Statvfs;
|
||||
use crate::tenant::checks::check_valid_layermap;
|
||||
use crate::tenant::gc_block::GcBlock;
|
||||
@@ -1269,7 +1272,10 @@ impl Timeline {
|
||||
// Define partitioning schema if needed
|
||||
|
||||
// HADRON
|
||||
let force_image_creation_lsn = self.get_force_image_creation_lsn();
|
||||
let force_image_creation_lsn = self
|
||||
.get_or_compute_force_image_creation_lsn(cancel, ctx)
|
||||
.await
|
||||
.map_err(CompactionError::Other)?;
|
||||
|
||||
// 1. L0 Compact
|
||||
let l0_outcome = {
|
||||
@@ -1479,37 +1485,59 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
// Get the force image creation LSN based on gc_cutoff_lsn.
|
||||
// Note that this is an estimation and the workload rate may suddenly change. When that happens,
|
||||
// the force image creation may be too early or too late, but eventually it should be able to catch up.
|
||||
pub(crate) fn get_force_image_creation_lsn(self: &Arc<Self>) -> Option<Lsn> {
|
||||
let image_creation_period = self.get_image_layer_force_creation_period()?;
|
||||
let current_lsn = self.get_last_record_lsn();
|
||||
let pitr_lsn = self.gc_info.read().unwrap().cutoffs.time?;
|
||||
let pitr_interval = self.get_pitr_interval();
|
||||
if pitr_lsn == Lsn::INVALID || pitr_interval.is_zero() {
|
||||
tracing::warn!(
|
||||
"pitr LSN/interval not found, skipping force image creation LSN calculation"
|
||||
);
|
||||
return None;
|
||||
// Get the force image creation LSN. Compute it if the last computed LSN is too old.
|
||||
async fn get_or_compute_force_image_creation_lsn(
|
||||
self: &Arc<Self>,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Option<Lsn>> {
|
||||
const FORCE_IMAGE_CREATION_LSN_COMPUTE_INTERVAL: Duration = Duration::from_secs(10 * 60); // 10 minutes
|
||||
let image_layer_force_creation_period = self.get_image_creation_timeout();
|
||||
if image_layer_force_creation_period.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let delta_lsn = current_lsn.checked_sub(pitr_lsn).unwrap().0
|
||||
* image_creation_period.as_secs()
|
||||
/ pitr_interval.as_secs();
|
||||
let force_image_creation_lsn = current_lsn.checked_sub(delta_lsn).unwrap_or(Lsn(0));
|
||||
|
||||
tracing::info!(
|
||||
"Tenant shard {} computed force_image_creation_lsn: {}. Current lsn: {}, image_layer_force_creation_period: {:?}, GC cutoff: {}, PITR interval: {:?}",
|
||||
self.tenant_shard_id,
|
||||
force_image_creation_lsn,
|
||||
current_lsn,
|
||||
image_creation_period,
|
||||
pitr_lsn,
|
||||
pitr_interval
|
||||
);
|
||||
|
||||
Some(force_image_creation_lsn)
|
||||
let image_layer_force_creation_period = image_layer_force_creation_period.unwrap();
|
||||
let force_image_creation_lsn_computed_at =
|
||||
*self.force_image_creation_lsn_computed_at.lock().unwrap();
|
||||
if force_image_creation_lsn_computed_at.is_none()
|
||||
|| force_image_creation_lsn_computed_at.unwrap().elapsed()
|
||||
> FORCE_IMAGE_CREATION_LSN_COMPUTE_INTERVAL
|
||||
{
|
||||
let now: SystemTime = SystemTime::now();
|
||||
let timestamp = now
|
||||
.checked_sub(image_layer_force_creation_period)
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"image creation timeout is too large: {image_layer_force_creation_period:?}"
|
||||
)
|
||||
})?;
|
||||
let timestamp = to_pg_timestamp(timestamp);
|
||||
let force_image_creation_lsn = match self
|
||||
.find_lsn_for_timestamp(timestamp, cancel, ctx)
|
||||
.await?
|
||||
{
|
||||
LsnForTimestamp::Present(lsn) | LsnForTimestamp::Future(lsn) => lsn,
|
||||
_ => {
|
||||
let gc_lsn = *self.get_applied_gc_cutoff_lsn();
|
||||
tracing::info!(
|
||||
"no LSN found for timestamp {timestamp:?}, using latest GC cutoff LSN {}",
|
||||
gc_lsn
|
||||
);
|
||||
gc_lsn
|
||||
}
|
||||
};
|
||||
self.force_image_creation_lsn
|
||||
.store(force_image_creation_lsn);
|
||||
*self.force_image_creation_lsn_computed_at.lock().unwrap() = Some(Instant::now());
|
||||
tracing::info!(
|
||||
"computed force image creation LSN: {}",
|
||||
force_image_creation_lsn
|
||||
);
|
||||
Ok(Some(force_image_creation_lsn))
|
||||
} else {
|
||||
Ok(Some(self.force_image_creation_lsn.load()))
|
||||
}
|
||||
}
|
||||
/* END_HADRON */
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::DownloadError;
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use anyhow::Context;
|
||||
use bytes::Bytes;
|
||||
use http_utils::error::ApiError;
|
||||
use neon_failpoint as fail;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::DetachBehavior;
|
||||
@@ -1113,7 +1114,7 @@ pub(super) async fn detach_and_reparent(
|
||||
// others will fail as if those timelines had been stopped for whatever reason.
|
||||
#[cfg(feature = "testing")]
|
||||
let failpoint_sem = || -> Option<Arc<Semaphore>> {
|
||||
fail::fail_point!("timeline-detach-ancestor::allow_one_reparented", |_| Some(
|
||||
fail::fail_point_sync!("timeline-detach-ancestor::allow_one_reparented", |_| Some(
|
||||
Arc::new(Semaphore::new(1))
|
||||
));
|
||||
None
|
||||
|
||||
@@ -359,14 +359,14 @@ impl<T: Types> Cache<T> {
|
||||
Err(e) => {
|
||||
// Retry on tenant manager error to handle tenant split more gracefully
|
||||
if attempt < GET_MAX_RETRIES {
|
||||
tracing::warn!(
|
||||
"Fail to resolve tenant shard in attempt {}: {:?}. Retrying...",
|
||||
attempt,
|
||||
e
|
||||
);
|
||||
tokio::time::sleep(RETRY_BACKOFF).await;
|
||||
continue;
|
||||
} else {
|
||||
tracing::warn!(
|
||||
"Failed to resolve tenant shard after {} attempts: {:?}",
|
||||
GET_MAX_RETRIES,
|
||||
e
|
||||
);
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,7 +47,6 @@ pub(crate) enum LayerManagerLockHolder {
|
||||
ImportPgData,
|
||||
DetachAncestor,
|
||||
Eviction,
|
||||
ComputeImageConsistentLsn,
|
||||
#[cfg(test)]
|
||||
Testing,
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use neon_failpoint as fail;
|
||||
use tracing::{error, info, info_span};
|
||||
use utils::fs_ext;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
@@ -11,6 +11,7 @@ use bytes::BytesMut;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use fail::fail_point;
|
||||
use futures::StreamExt;
|
||||
use neon_failpoint as fail;
|
||||
use postgres_backend::is_expected_io_error;
|
||||
use postgres_connection::PgConnectionConfig;
|
||||
use postgres_ffi::WAL_SEGMENT_SIZE;
|
||||
|
||||
@@ -147,16 +147,6 @@ pub enum RedoAttemptType {
|
||||
GcCompaction,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RedoAttemptType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
RedoAttemptType::ReadPage => write!(f, "read page"),
|
||||
RedoAttemptType::LegacyCompaction => write!(f, "legacy compaction"),
|
||||
RedoAttemptType::GcCompaction => write!(f, "gc compaction"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Public interface of WAL redo manager
|
||||
///
|
||||
@@ -209,7 +199,6 @@ impl PostgresRedoManager {
|
||||
self.conf.wal_redo_timeout,
|
||||
pg_version,
|
||||
max_retry_attempts,
|
||||
redo_attempt_type,
|
||||
)
|
||||
.await
|
||||
};
|
||||
@@ -232,7 +221,6 @@ impl PostgresRedoManager {
|
||||
self.conf.wal_redo_timeout,
|
||||
pg_version,
|
||||
max_retry_attempts,
|
||||
redo_attempt_type,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -457,7 +445,6 @@ impl PostgresRedoManager {
|
||||
wal_redo_timeout: Duration,
|
||||
pg_version: PgMajorVersion,
|
||||
max_retry_attempts: u32,
|
||||
redo_attempt_type: RedoAttemptType,
|
||||
) -> Result<Bytes, Error> {
|
||||
*(self.last_redo_at.lock().unwrap()) = Some(Instant::now());
|
||||
|
||||
@@ -498,28 +485,17 @@ impl PostgresRedoManager {
|
||||
);
|
||||
|
||||
if let Err(e) = result.as_ref() {
|
||||
macro_rules! message {
|
||||
($level:tt) => {
|
||||
$level!(
|
||||
"error applying {} WAL records {}..{} ({} bytes) to key {} during {}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}",
|
||||
records.len(),
|
||||
records.first().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
records.last().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
nbytes,
|
||||
key,
|
||||
redo_attempt_type,
|
||||
base_img_lsn,
|
||||
lsn,
|
||||
n_attempts,
|
||||
e,
|
||||
)
|
||||
}
|
||||
}
|
||||
match redo_attempt_type {
|
||||
RedoAttemptType::ReadPage => message!(error),
|
||||
RedoAttemptType::LegacyCompaction => message!(error),
|
||||
RedoAttemptType::GcCompaction => message!(warn),
|
||||
}
|
||||
error!(
|
||||
"error applying {} WAL records {}..{} ({} bytes) to key {key}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}",
|
||||
records.len(),
|
||||
records.first().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
records.last().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
nbytes,
|
||||
base_img_lsn,
|
||||
lsn,
|
||||
n_attempts,
|
||||
e,
|
||||
);
|
||||
}
|
||||
|
||||
result.map_err(Error::Other)
|
||||
|
||||
@@ -38,8 +38,6 @@ DATA = \
|
||||
neon--1.3--1.4.sql \
|
||||
neon--1.4--1.5.sql \
|
||||
neon--1.5--1.6.sql \
|
||||
neon--1.6--1.7.sql \
|
||||
neon--1.7--1.6.sql \
|
||||
neon--1.6--1.5.sql \
|
||||
neon--1.5--1.4.sql \
|
||||
neon--1.4--1.3.sql \
|
||||
|
||||
@@ -54,7 +54,6 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/twophase.h"
|
||||
#include "access/xlog.h"
|
||||
#include "access/xlogdefs.h"
|
||||
#include "access/xlog_internal.h"
|
||||
@@ -65,7 +64,6 @@
|
||||
#include "miscadmin.h"
|
||||
#include "port/pg_iovec.h"
|
||||
#include "postmaster/interrupt.h"
|
||||
#include "postmaster/postmaster.h"
|
||||
#include "replication/walsender.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "utils/timeout.h"
|
||||
@@ -77,18 +75,11 @@
|
||||
#include "neon_perf_counters.h"
|
||||
#include "pagestore_client.h"
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
#include "storage/procnumber.h"
|
||||
#else
|
||||
#define MyProcNumber MyProc->pgprocno
|
||||
#endif
|
||||
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 15
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#include "access/xlogrecovery.h"
|
||||
#endif
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
#if PG_VERSION_NUM < 160000
|
||||
typedef PGAlignedBlock PGIOAlignedBlock;
|
||||
#endif
|
||||
|
||||
@@ -303,15 +294,6 @@ static PrefetchState *MyPState;
|
||||
|
||||
static process_interrupts_callback_t prev_interrupt_cb;
|
||||
|
||||
/*
|
||||
* Array in shared memory each cell of which contains minimal in-flight request LSN sent to PS by the backend which procno is
|
||||
* used as index in this array. This array is initially filled with InfiniteXlogRecPtr (UINT64_MAX) so if backend
|
||||
* didn't send any request to PS, then this value doesn't effect global min.
|
||||
*
|
||||
* We support only 64-bit platforms and so assume that access to array elements is atomic and no any synchronization is needed.
|
||||
*/
|
||||
static XLogRecPtr* minPrefetchLsn;
|
||||
|
||||
static bool compact_prefetch_buffers(void);
|
||||
static void consume_prefetch_responses(void);
|
||||
static uint64 prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
|
||||
@@ -334,41 +316,6 @@ pg_init_communicator(void)
|
||||
ProcessInterruptsCallback = communicator_processinterrupts;
|
||||
}
|
||||
|
||||
static Size
|
||||
CommunicatorShmemSize(void)
|
||||
{
|
||||
#if PG_MAJORVERSION_NUM >= 15
|
||||
Assert(MaxBackends != 0);
|
||||
return (MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts) * sizeof(XLogRecPtr);
|
||||
#else
|
||||
return (MAX_BACKENDS + NUM_AUXILIARY_PROCS + max_prepared_xacts) * sizeof(XLogRecPtr);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
CommunicatorShmemRequest(void)
|
||||
{
|
||||
RequestAddinShmemSpace(CommunicatorShmemSize());
|
||||
}
|
||||
|
||||
void
|
||||
CommunicatorShmemInit(void)
|
||||
{
|
||||
bool found;
|
||||
minPrefetchLsn = (XLogRecPtr*)ShmemInitStruct("Communicator shared state",
|
||||
CommunicatorShmemSize(),
|
||||
&found);
|
||||
if (!found)
|
||||
{
|
||||
/*
|
||||
* Fill with InfiniteXLogRecPtr (UINT64_MAX).
|
||||
* If backend didn't send any requests to PS, then InfiniteXLogRecPtr doesn't affect global minimal value.
|
||||
*/
|
||||
memset(minPrefetchLsn, 0xFF, CommunicatorShmemSize());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
compact_prefetch_buffers(void)
|
||||
{
|
||||
@@ -474,7 +421,7 @@ check_getpage_response(PrefetchRequest* slot, NeonResponse* resp)
|
||||
{
|
||||
if (resp->tag != T_NeonGetPageResponse && resp->tag != T_NeonErrorResponse)
|
||||
{
|
||||
neon_shard_log(slot->shard_no, PANIC, "Unexpected prefetch response %d, ring_receive=" UINT64_FORMAT ", ring_flush=" UINT64_FORMAT ", ring_unused=" UINT64_FORMAT "",
|
||||
neon_shard_log(slot->shard_no, PANIC, "Unexpected prefetch response %d, ring_receive=%ld, ring_flush=%ld, ring_unused=%ld",
|
||||
resp->tag, MyPState->ring_receive, MyPState->ring_flush, MyPState->ring_unused);
|
||||
}
|
||||
if (neon_protocol_version >= 3)
|
||||
@@ -491,7 +438,7 @@ check_getpage_response(PrefetchRequest* slot, NeonResponse* resp)
|
||||
getpage_resp->req.blkno != slot->buftag.blockNum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(slot->shard_no, PANIC,
|
||||
"Receive unexpected getpage response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
|
||||
"Receive unexpected getpage response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(getpage_resp->req.rinfo), getpage_resp->req.forknum, getpage_resp->req.blkno,
|
||||
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since), RelFileInfoFmt(rinfo), slot->buftag.forkNum, slot->buftag.blockNum);
|
||||
}
|
||||
@@ -500,27 +447,13 @@ check_getpage_response(PrefetchRequest* slot, NeonResponse* resp)
|
||||
resp->lsn != slot->request_lsns.request_lsn ||
|
||||
resp->not_modified_since != slot->request_lsns.not_modified_since)
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Update min in-flight prefetch LSN for this backend.
|
||||
*/
|
||||
static void
|
||||
update_min_prefetch_lsn(uint64 ring_index)
|
||||
{
|
||||
if (ring_index + 1 < MyPState->ring_unused)
|
||||
{
|
||||
PrefetchRequest* next_slot = GetPrfSlot(ring_index + 1);
|
||||
Assert(minPrefetchLsn[MyProcNumber] <= next_slot->request_lsns.request_lsn);
|
||||
minPrefetchLsn[MyProcNumber] = next_slot->request_lsns.request_lsn;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If there might be responses still in the TCP buffer, then we should try to
|
||||
* use those, to reduce any TCP backpressure on the OS/PS side.
|
||||
@@ -545,9 +478,8 @@ communicator_prefetch_pump_state(void)
|
||||
NeonResponse *response;
|
||||
PrefetchRequest *slot;
|
||||
MemoryContext old;
|
||||
uint64 my_ring_index = MyPState->ring_receive;
|
||||
|
||||
slot = GetPrfSlot(my_ring_index);
|
||||
slot = GetPrfSlot(MyPState->ring_receive);
|
||||
|
||||
old = MemoryContextSwitchTo(MyPState->errctx);
|
||||
response = page_server->try_receive(slot->shard_no);
|
||||
@@ -556,19 +488,17 @@ communicator_prefetch_pump_state(void)
|
||||
if (response == NULL)
|
||||
break;
|
||||
|
||||
update_min_prefetch_lsn(my_ring_index);
|
||||
|
||||
check_getpage_response(slot, response);
|
||||
|
||||
/* The slot should still be valid */
|
||||
if (slot->status != PRFS_REQUESTED ||
|
||||
slot->response != NULL ||
|
||||
slot->my_ring_index != my_ring_index)
|
||||
slot->my_ring_index != MyPState->ring_receive)
|
||||
{
|
||||
neon_shard_log(slot->shard_no, PANIC,
|
||||
"Incorrect prefetch slot state after receive: status=%d response=%p my=" UINT64_FORMAT " receive=" UINT64_FORMAT "",
|
||||
"Incorrect prefetch slot state after receive: status=%d response=%p my=%lu receive=%lu",
|
||||
slot->status, slot->response,
|
||||
slot->my_ring_index, my_ring_index);
|
||||
(long) slot->my_ring_index, (long) MyPState->ring_receive);
|
||||
}
|
||||
/* update prefetch state */
|
||||
MyPState->n_responses_buffered += 1;
|
||||
@@ -735,9 +665,6 @@ consume_prefetch_responses(void)
|
||||
{
|
||||
if (MyPState->ring_receive < MyPState->ring_unused)
|
||||
prefetch_wait_for(MyPState->ring_unused - 1);
|
||||
|
||||
minPrefetchLsn[MyProcNumber] = InfiniteXLogRecPtr; /* No more in-flight prefetch requests from this backend */
|
||||
|
||||
/*
|
||||
* We know for sure we're not working on any prefetch pages after
|
||||
* this.
|
||||
@@ -862,9 +789,9 @@ prefetch_read(PrefetchRequest *slot)
|
||||
slot->my_ring_index != MyPState->ring_receive)
|
||||
{
|
||||
neon_shard_log(slot->shard_no, PANIC,
|
||||
"Incorrect prefetch read: status=%d response=%p my=" UINT64_FORMAT " receive=" UINT64_FORMAT "",
|
||||
"Incorrect prefetch read: status=%d response=%p my=%lu receive=%lu",
|
||||
slot->status, slot->response,
|
||||
slot->my_ring_index, MyPState->ring_receive);
|
||||
(long)slot->my_ring_index, (long)MyPState->ring_receive);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -879,9 +806,6 @@ prefetch_read(PrefetchRequest *slot)
|
||||
old = MemoryContextSwitchTo(MyPState->errctx);
|
||||
response = (NeonResponse *) page_server->receive(shard_no);
|
||||
MemoryContextSwitchTo(old);
|
||||
|
||||
update_min_prefetch_lsn(my_ring_index);
|
||||
|
||||
if (response)
|
||||
{
|
||||
check_getpage_response(slot, response);
|
||||
@@ -892,9 +816,9 @@ prefetch_read(PrefetchRequest *slot)
|
||||
slot->my_ring_index != MyPState->ring_receive)
|
||||
{
|
||||
neon_shard_log(shard_no, PANIC,
|
||||
"Incorrect prefetch slot state after receive: status=%d response=%p my=" UINT64_FORMAT " receive=" UINT64_FORMAT "",
|
||||
"Incorrect prefetch slot state after receive: status=%d response=%p my=%lu receive=%lu",
|
||||
slot->status, slot->response,
|
||||
slot->my_ring_index, MyPState->ring_receive);
|
||||
(long) slot->my_ring_index, (long) MyPState->ring_receive);
|
||||
}
|
||||
|
||||
/* update prefetch state */
|
||||
@@ -928,8 +852,8 @@ prefetch_read(PrefetchRequest *slot)
|
||||
* and the prefetch queue was flushed during the receive call
|
||||
*/
|
||||
neon_shard_log(shard_no, LOG,
|
||||
"No response from reading prefetch entry " UINT64_FORMAT ": %u/%u/%u.%u block %u. This can be caused by a concurrent disconnect",
|
||||
my_ring_index,
|
||||
"No response from reading prefetch entry %lu: %u/%u/%u.%u block %u. This can be caused by a concurrent disconnect",
|
||||
(long) my_ring_index,
|
||||
RelFileInfoFmt(BufTagGetNRelFileInfo(buftag)),
|
||||
buftag.forkNum, buftag.blockNum);
|
||||
return false;
|
||||
@@ -1000,8 +924,6 @@ prefetch_on_ps_disconnect(void)
|
||||
MyNeonCounters->getpage_prefetch_discards_total += 1;
|
||||
}
|
||||
|
||||
minPrefetchLsn[MyProcNumber] = InfiniteXLogRecPtr; /* No more in-flight prefetch requests from this backend */
|
||||
|
||||
/*
|
||||
* We can have gone into retry due to network error, so update stats with
|
||||
* the latest available
|
||||
@@ -1103,8 +1025,6 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
Assert(slot->response == NULL);
|
||||
Assert(slot->my_ring_index == MyPState->ring_unused);
|
||||
|
||||
minPrefetchLsn[MyProcNumber] = Min(request.hdr.lsn, minPrefetchLsn[MyProcNumber]);
|
||||
|
||||
while (!page_server->send(slot->shard_no, (NeonRequest *) &request))
|
||||
{
|
||||
Assert(mySlotNo == MyPState->ring_unused);
|
||||
@@ -1125,23 +1045,6 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
Assert(!found);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that returned page LSN is consistent with request lsns
|
||||
*/
|
||||
static void
|
||||
check_page_lsn(NeonGetPageResponse* resp)
|
||||
{
|
||||
if (PageGetLSN(resp->page) > resp->req.hdr.not_modified_since)
|
||||
neon_log(PANIC, "Invalid getpage response version: %X/%08X is higher than last modified LSN %X/%08X",
|
||||
LSN_FORMAT_ARGS(PageGetLSN(resp->page)),
|
||||
LSN_FORMAT_ARGS(resp->req.hdr.not_modified_since));
|
||||
|
||||
if (PageGetLSN(resp->page) > resp->req.hdr.lsn)
|
||||
neon_log(PANIC, "Invalid getpage response version: %X/%08X is higher than request LSN %X/%08X",
|
||||
LSN_FORMAT_ARGS(PageGetLSN(resp->page)),
|
||||
LSN_FORMAT_ARGS(resp->req.hdr.lsn));
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup of already received prefetch requests. Only already received responses matching required LSNs are accepted.
|
||||
* Present pages are marked in "mask" bitmap and total number of such pages is returned.
|
||||
@@ -1165,7 +1068,7 @@ communicator_prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumbe
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
PrfHashEntry *entry;
|
||||
NeonGetPageResponse* resp;
|
||||
|
||||
hashkey.buftag.blockNum = blocknum + i;
|
||||
entry = prfh_lookup(MyPState->prf_hash, &hashkey);
|
||||
|
||||
@@ -1198,9 +1101,8 @@ communicator_prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumbe
|
||||
continue;
|
||||
}
|
||||
Assert(slot->response->tag == T_NeonGetPageResponse); /* checked by check_getpage_response when response was assigned to the slot */
|
||||
resp = (NeonGetPageResponse*)slot->response;
|
||||
check_page_lsn(resp);
|
||||
memcpy(buffers[i], resp->page, BLCKSZ);
|
||||
memcpy(buffers[i], ((NeonGetPageResponse*)slot->response)->page, BLCKSZ);
|
||||
|
||||
|
||||
/*
|
||||
* With lfc_store_prefetch_result=true prefetch result is stored in LFC in prefetch_pump_state when response is received
|
||||
@@ -1551,7 +1453,6 @@ page_server_request(void const *req)
|
||||
PG_TRY();
|
||||
{
|
||||
before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));
|
||||
minPrefetchLsn[MyProcNumber] = ((NeonRequest *)req)->lsn;
|
||||
do
|
||||
{
|
||||
while (!page_server->send(shard_no, (NeonRequest *) req)
|
||||
@@ -1563,12 +1464,10 @@ page_server_request(void const *req)
|
||||
resp = page_server->receive(shard_no);
|
||||
MyNeonCounters->pageserver_open_requests--;
|
||||
} while (resp == NULL);
|
||||
minPrefetchLsn[MyProcNumber] = InfiniteXLogRecPtr;
|
||||
cancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
minPrefetchLsn[MyProcNumber] = InfiniteXLogRecPtr;
|
||||
cancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));
|
||||
/* Nothing should cancel disconnect: we should not leave connection in opaque state */
|
||||
HOLD_INTERRUPTS();
|
||||
@@ -1945,7 +1844,7 @@ nm_to_string(NeonMessage *msg)
|
||||
NeonDbSizeResponse *msg_resp = (NeonDbSizeResponse *) msg;
|
||||
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonDbSizeResponse\"");
|
||||
appendStringInfo(&s, ", \"db_size\": " INT64_FORMAT "}",
|
||||
appendStringInfo(&s, ", \"db_size\": %ld}",
|
||||
msg_resp->db_size);
|
||||
appendStringInfoChar(&s, '}');
|
||||
|
||||
@@ -1989,7 +1888,7 @@ communicator_init(void)
|
||||
* the check here. That's OK, we don't expect the logic to change in old
|
||||
* releases.
|
||||
*/
|
||||
#if PG_MAJORVERSION_NUM >= 15
|
||||
#if PG_VERSION_NUM>=150000
|
||||
if (MyNeonCounters >= &neon_per_backend_counters_shared[NUM_NEON_PERF_COUNTER_SLOTS])
|
||||
elog(ERROR, "MyNeonCounters points past end of array");
|
||||
#endif
|
||||
@@ -2068,7 +1967,7 @@ neon_prefetch_response_usable(neon_request_lsns *request_lsns,
|
||||
* Each request to the pageserver has three LSN values associated with it:
|
||||
* `not_modified_since`, `request_lsn`, and 'effective_request_lsn'.
|
||||
* `not_modified_since` and `request_lsn` are sent to the pageserver, but
|
||||
* in the primary node, we always use InfiniteXLogRecPtr as the `request_lsn`, so
|
||||
* in the primary node, we always use UINT64_MAX as the `request_lsn`, so
|
||||
* we remember `effective_request_lsn` separately. In a primary,
|
||||
* `effective_request_lsn` is the same as `not_modified_since`.
|
||||
* See comments in neon_get_request_lsns why we can not use last flush WAL position here.
|
||||
@@ -2146,7 +2045,7 @@ communicator_exists(NRelFileInfo rinfo, ForkNumber forkNum, neon_request_lsns *r
|
||||
exists_resp->req.forknum != request.forknum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(0, PANIC,
|
||||
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(exists_resp->req.rinfo), exists_resp->req.forknum,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), request.forknum);
|
||||
}
|
||||
@@ -2159,14 +2058,14 @@ communicator_exists(NRelFileInfo rinfo, ForkNumber forkNum, neon_request_lsns *r
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
RelFileInfoFmt(rinfo),
|
||||
forkNum,
|
||||
@@ -2328,7 +2227,6 @@ Retry:
|
||||
case T_NeonGetPageResponse:
|
||||
{
|
||||
NeonGetPageResponse* getpage_resp = (NeonGetPageResponse *) resp;
|
||||
check_page_lsn(getpage_resp);
|
||||
memcpy(buffer, getpage_resp->page, BLCKSZ);
|
||||
|
||||
/*
|
||||
@@ -2343,7 +2241,7 @@ Retry:
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[shard %d, reqid " UINT64_HEX_FORMAT "] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[shard %d, reqid %lx] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
slot->shard_no, resp->reqid, blockno, RelFileInfoFmt(rinfo),
|
||||
forkNum, LSN_FORMAT_ARGS(reqlsns->effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
@@ -2396,7 +2294,7 @@ communicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum, neon_request_lsns *
|
||||
relsize_resp->req.forknum != forknum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(0, PANIC,
|
||||
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(relsize_resp->req.rinfo), relsize_resp->req.forknum,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), forknum);
|
||||
}
|
||||
@@ -2409,14 +2307,14 @@ communicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum, neon_request_lsns *
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
RelFileInfoFmt(rinfo),
|
||||
forknum,
|
||||
@@ -2466,7 +2364,7 @@ communicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns)
|
||||
dbsize_resp->req.dbNode != dbNode)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(0, PANIC,
|
||||
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), dbsize_resp->req.dbNode,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), dbNode);
|
||||
}
|
||||
@@ -2479,14 +2377,14 @@ communicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read db size of db %u from page server at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read db size of db %u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns->effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
@@ -2526,18 +2424,15 @@ communicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *re
|
||||
PG_TRY();
|
||||
{
|
||||
before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));
|
||||
minPrefetchLsn[MyProcNumber] = request_lsns->request_lsn;
|
||||
do
|
||||
{
|
||||
while (!page_server->send(shard_no, &request.hdr) || !page_server->flush(shard_no));
|
||||
resp = page_server->receive(shard_no);
|
||||
} while (resp == NULL);
|
||||
minPrefetchLsn[MyProcNumber] = InfiniteXLogRecPtr;
|
||||
cancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
minPrefetchLsn[MyProcNumber] = InfiniteXLogRecPtr;
|
||||
cancel_before_shmem_exit(prefetch_on_exit, Int32GetDatum(shard_no));
|
||||
/* Nothing should cancel disconnect: we should not leave connection in opaque state */
|
||||
HOLD_INTERRUPTS();
|
||||
@@ -2560,7 +2455,7 @@ communicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *re
|
||||
slru_resp->req.segno != segno)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(0, PANIC,
|
||||
"Unexpect response {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%lluu}",
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%lluu}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), slru_resp->req.kind, slru_resp->req.segno,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), kind, (unsigned long long) segno);
|
||||
}
|
||||
@@ -2574,14 +2469,14 @@ communicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *re
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=" UINT64_HEX_FORMAT ",lsn=%X/%08X, since=%X/%08X}",
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid " UINT64_HEX_FORMAT "] could not read SLRU %d segment %llu at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read SLRU %d segment %llu at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
kind,
|
||||
(unsigned long long) segno,
|
||||
@@ -2682,19 +2577,3 @@ communicator_processinterrupts(void)
|
||||
|
||||
return prev_interrupt_cb();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(neon_communicator_min_inflight_request_lsn);
|
||||
|
||||
Datum
|
||||
neon_communicator_min_inflight_request_lsn(PG_FUNCTION_ARGS)
|
||||
{
|
||||
XLogRecPtr min_lsn = RecoveryInProgress()
|
||||
? GetXLogReplayRecPtr(NULL)
|
||||
: InfiniteXLogRecPtr;
|
||||
size_t n_procs = ProcGlobal->allProcCount;
|
||||
for (size_t i = 0; i < n_procs; i++)
|
||||
{
|
||||
min_lsn = Min(min_lsn, minPrefetchLsn[i]);
|
||||
}
|
||||
PG_RETURN_INT64(min_lsn);
|
||||
}
|
||||
|
||||
@@ -46,4 +46,5 @@ extern int communicator_read_slru_segment(SlruKind kind, int64 segno,
|
||||
extern void communicator_reconfigure_timeout_if_needed(void);
|
||||
extern void communicator_prefetch_pump_state(void);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -162,34 +162,8 @@ typedef struct FileCacheControl
|
||||
dlist_head lru; /* double linked list for LRU replacement
|
||||
* algorithm */
|
||||
dlist_head holes; /* double linked list of punched holes */
|
||||
|
||||
HyperLogLogState wss_estimation; /* estimation of working set size */
|
||||
ConditionVariable cv[N_COND_VARS]; /* turnstile of condition variables */
|
||||
|
||||
/*
|
||||
* Estimation of working set size.
|
||||
*
|
||||
* This is not guarded by the lock. No locking is needed because all the
|
||||
* writes to the "registers" are simple 64-bit stores, to update a
|
||||
* timestamp. We assume that:
|
||||
*
|
||||
* - 64-bit stores are atomic. We could enforce that by using
|
||||
* pg_atomic_uint64 instead of TimestampTz as the datatype in hll.h, but
|
||||
* for now we just rely on it implicitly.
|
||||
*
|
||||
* - Even if they're not, and there is a race between two stores, it
|
||||
* doesn't matter much which one wins because they're both updating the
|
||||
* register with the current timestamp. Or you have a race between
|
||||
* resetting the register and updating it, in which case it also doesn't
|
||||
* matter much which one wins.
|
||||
*
|
||||
* - If they're not atomic, you might get an occasional "torn write" if
|
||||
* you're really unlucky, but we tolerate that too. It just means that
|
||||
* the estimate will be a little off, until the register is updated
|
||||
* again.
|
||||
*/
|
||||
HyperLogLogState wss_estimation;
|
||||
|
||||
/* Prewarmer state */
|
||||
PrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];
|
||||
size_t n_prewarm_workers;
|
||||
size_t n_prewarm_entries;
|
||||
@@ -219,6 +193,10 @@ static char *lfc_path;
|
||||
static uint64 lfc_generation;
|
||||
static FileCacheControl *lfc_ctl;
|
||||
static bool lfc_do_prewarm;
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
#if PG_VERSION_NUM>=150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook;
|
||||
#endif
|
||||
|
||||
bool lfc_store_prefetch_result;
|
||||
bool lfc_prewarm_update_ws_estimation;
|
||||
@@ -227,8 +205,6 @@ bool AmPrewarmWorker;
|
||||
|
||||
#define LFC_ENABLED() (lfc_ctl->limit != 0)
|
||||
|
||||
PGDLLEXPORT void lfc_prewarm_main(Datum main_arg);
|
||||
|
||||
/*
|
||||
* Close LFC file if opened.
|
||||
* All backends should close their LFC files once LFC is disabled.
|
||||
@@ -338,14 +314,18 @@ lfc_ensure_opened(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
LfcShmemInit(void)
|
||||
static void
|
||||
lfc_shmem_startup(void)
|
||||
{
|
||||
bool found;
|
||||
static HASHCTL info;
|
||||
|
||||
if (lfc_max_size <= 0)
|
||||
return;
|
||||
if (prev_shmem_startup_hook)
|
||||
{
|
||||
prev_shmem_startup_hook();
|
||||
}
|
||||
|
||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||
|
||||
lfc_ctl = (FileCacheControl *) ShmemInitStruct("lfc", sizeof(FileCacheControl), &found);
|
||||
if (!found)
|
||||
@@ -390,16 +370,19 @@ LfcShmemInit(void)
|
||||
ConditionVariableInit(&lfc_ctl->cv[i]);
|
||||
|
||||
}
|
||||
LWLockRelease(AddinShmemInitLock);
|
||||
}
|
||||
|
||||
void
|
||||
LfcShmemRequest(void)
|
||||
static void
|
||||
lfc_shmem_request(void)
|
||||
{
|
||||
if (lfc_max_size > 0)
|
||||
{
|
||||
RequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size) + 1, FILE_CACHE_ENRTY_SIZE));
|
||||
RequestNamedLWLockTranche("lfc_lock", 1);
|
||||
}
|
||||
#if PG_VERSION_NUM>=150000
|
||||
if (prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
#endif
|
||||
|
||||
RequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size) + 1, FILE_CACHE_ENRTY_SIZE));
|
||||
RequestNamedLWLockTranche("lfc_lock", 1);
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -631,6 +614,18 @@ lfc_init(void)
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
if (lfc_max_size == 0)
|
||||
return;
|
||||
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = lfc_shmem_startup;
|
||||
#if PG_VERSION_NUM>=150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = lfc_shmem_request;
|
||||
#else
|
||||
lfc_shmem_request();
|
||||
#endif
|
||||
}
|
||||
|
||||
FileCacheState*
|
||||
@@ -1147,13 +1142,6 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
/* Update working set size estimate for the blocks */
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
tag.blockNum = blkno + i;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
|
||||
/*
|
||||
* For every chunk that has blocks we're interested in, we
|
||||
* 1. get the chunk header
|
||||
@@ -1232,6 +1220,14 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
|
||||
/* Approximate working set for the blocks assumed in this entry */
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
tag.blockNum = blkno + i;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
|
||||
if (entry == NULL)
|
||||
{
|
||||
/* Pages are not cached */
|
||||
@@ -1508,15 +1504,9 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
return false;
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
tag.forkNum = forknum;
|
||||
|
||||
/* Update working set size estimate for the blocks */
|
||||
if (lfc_prewarm_update_ws_estimation)
|
||||
{
|
||||
tag.blockNum = blkno;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
tag.blockNum = blkno - chunk_offs;
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
@@ -1534,13 +1524,19 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
|
||||
if (lwlsn > lsn)
|
||||
{
|
||||
elog(DEBUG1, "Skip LFC write for %u because LwLSN=%X/%X is greater than not_nodified_since LSN %X/%X",
|
||||
elog(DEBUG1, "Skip LFC write for %d because LwLSN=%X/%X is greater than not_nodified_since LSN %X/%X",
|
||||
blkno, LSN_FORMAT_ARGS(lwlsn), LSN_FORMAT_ARGS(lsn));
|
||||
LWLockRelease(lfc_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
|
||||
|
||||
if (lfc_prewarm_update_ws_estimation)
|
||||
{
|
||||
tag.blockNum = blkno;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
if (found)
|
||||
{
|
||||
state = GET_STATE(entry, chunk_offs);
|
||||
@@ -1653,15 +1649,9 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
return;
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
tag.forkNum = forkNum;
|
||||
|
||||
/* Update working set size estimate for the blocks */
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
tag.blockNum = blkno + i;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
@@ -1702,6 +1692,14 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
cv = &lfc_ctl->cv[hash % N_COND_VARS];
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
|
||||
|
||||
/* Approximate working set for the blocks assumed in this entry */
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
tag.blockNum = blkno + i;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
/*
|
||||
@@ -2137,23 +2135,40 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(approximate_working_set_size_seconds);
|
||||
|
||||
/*
|
||||
* Internal implementation of the approximate_working_set_size_seconds()
|
||||
* function.
|
||||
*/
|
||||
int32
|
||||
lfc_approximate_working_set_size_seconds(time_t duration, bool reset)
|
||||
Datum
|
||||
approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int32 dc;
|
||||
if (lfc_size_limit != 0)
|
||||
{
|
||||
int32 dc;
|
||||
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
|
||||
LWLockRelease(lfc_lock);
|
||||
PG_RETURN_INT32(dc);
|
||||
}
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
if (lfc_size_limit == 0)
|
||||
return -1;
|
||||
PG_FUNCTION_INFO_V1(approximate_working_set_size);
|
||||
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
|
||||
if (reset)
|
||||
memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
|
||||
return dc;
|
||||
Datum
|
||||
approximate_working_set_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (lfc_size_limit != 0)
|
||||
{
|
||||
int32 dc;
|
||||
bool reset = PG_GETARG_BOOL(0);
|
||||
LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
|
||||
if (reset)
|
||||
memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
|
||||
LWLockRelease(lfc_lock);
|
||||
PG_RETURN_INT32(dc);
|
||||
}
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(get_local_cache_state);
|
||||
|
||||
@@ -47,8 +47,7 @@ extern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blk
|
||||
extern FileCacheState* lfc_get_state(size_t max_entries);
|
||||
extern void lfc_prewarm(FileCacheState* fcs, uint32 n_workers);
|
||||
|
||||
extern int32 lfc_approximate_working_set_size_seconds(time_t duration, bool reset);
|
||||
|
||||
PGDLLEXPORT void lfc_prewarm_main(Datum main_arg);
|
||||
|
||||
static inline bool
|
||||
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
@@ -118,6 +118,10 @@ typedef struct
|
||||
ShardMap shard_map;
|
||||
} PagestoreShmemState;
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
#endif
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
static PagestoreShmemState *pagestore_shared;
|
||||
static uint64 pagestore_local_counter = 0;
|
||||
|
||||
@@ -1280,12 +1284,18 @@ check_neon_id(char **newval, void **extra, GucSource source)
|
||||
return **newval == '\0' || HexDecodeString(id, *newval, 16);
|
||||
}
|
||||
|
||||
static Size
|
||||
PagestoreShmemSize(void)
|
||||
{
|
||||
return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize());
|
||||
}
|
||||
|
||||
void
|
||||
static bool
|
||||
PagestoreShmemInit(void)
|
||||
{
|
||||
bool found;
|
||||
|
||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||
pagestore_shared = ShmemInitStruct("libpagestore shared state",
|
||||
sizeof(PagestoreShmemState),
|
||||
&found);
|
||||
@@ -1296,12 +1306,44 @@ PagestoreShmemInit(void)
|
||||
memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
|
||||
AssignPageserverConnstring(page_server_connstring, NULL);
|
||||
}
|
||||
|
||||
NeonPerfCountersShmemInit();
|
||||
|
||||
LWLockRelease(AddinShmemInitLock);
|
||||
return found;
|
||||
}
|
||||
|
||||
void
|
||||
PagestoreShmemRequest(void)
|
||||
static void
|
||||
pagestore_shmem_startup_hook(void)
|
||||
{
|
||||
RequestAddinShmemSpace(sizeof(PagestoreShmemState));
|
||||
if (prev_shmem_startup_hook)
|
||||
prev_shmem_startup_hook();
|
||||
|
||||
PagestoreShmemInit();
|
||||
}
|
||||
|
||||
static void
|
||||
pagestore_shmem_request(void)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
if (prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
#endif
|
||||
|
||||
RequestAddinShmemSpace(PagestoreShmemSize());
|
||||
}
|
||||
|
||||
static void
|
||||
pagestore_prepare_shmem(void)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = pagestore_shmem_request;
|
||||
#else
|
||||
pagestore_shmem_request();
|
||||
#endif
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = pagestore_shmem_startup_hook;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1310,6 +1352,8 @@ PagestoreShmemRequest(void)
|
||||
void
|
||||
pg_init_libpagestore(void)
|
||||
{
|
||||
pagestore_prepare_shmem();
|
||||
|
||||
DefineCustomStringVariable("neon.pageserver_connstring",
|
||||
"connection string to the page server",
|
||||
NULL,
|
||||
@@ -1460,6 +1504,8 @@ pg_init_libpagestore(void)
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
relsize_hash_init();
|
||||
|
||||
if (page_server != NULL)
|
||||
neon_log(ERROR, "libpagestore already loaded");
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
create function neon_communicator_min_inflight_request_lsn() returns pg_catalog.pg_lsn
|
||||
AS 'MODULE_PATHNAME', 'neon_communicator_min_inflight_request_lsn'
|
||||
LANGUAGE C;
|
||||
@@ -1 +0,0 @@
|
||||
drop function neon_communicator_min_inflight_request_lsn();
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "replication/slot.h"
|
||||
#include "replication/walsender.h"
|
||||
#include "storage/proc.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "funcapi.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "utils/builtins.h"
|
||||
@@ -60,15 +59,11 @@ static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
|
||||
static void neon_ExecutorStart(QueryDesc *queryDesc, int eflags);
|
||||
static void neon_ExecutorEnd(QueryDesc *queryDesc);
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
|
||||
static void neon_shmem_startup_hook(void);
|
||||
static void neon_shmem_request_hook(void);
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 15
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
uint32 WAIT_EVENT_NEON_LFC_MAINTENANCE;
|
||||
uint32 WAIT_EVENT_NEON_LFC_READ;
|
||||
@@ -455,13 +450,15 @@ _PG_init(void)
|
||||
*/
|
||||
#if PG_VERSION_NUM >= 160000
|
||||
load_file("$libdir/neon_rmgr", false);
|
||||
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = neon_shmem_startup_hook;
|
||||
#endif
|
||||
|
||||
/* dummy call to a Rust function in the communicator library, to check that it works */
|
||||
(void) communicator_dummy(123);
|
||||
|
||||
pg_init_libpagestore();
|
||||
relsize_hash_init();
|
||||
lfc_init();
|
||||
pg_init_walproposer();
|
||||
init_lwlsncache();
|
||||
@@ -555,16 +552,6 @@ _PG_init(void)
|
||||
|
||||
ReportSearchPath();
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = neon_shmem_request_hook;
|
||||
#else
|
||||
neon_shmem_request_hook();
|
||||
#endif
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = neon_shmem_startup_hook;
|
||||
|
||||
|
||||
prev_ExecutorStart = ExecutorStart_hook;
|
||||
ExecutorStart_hook = neon_ExecutorStart;
|
||||
prev_ExecutorEnd = ExecutorEnd_hook;
|
||||
@@ -574,8 +561,6 @@ _PG_init(void)
|
||||
PG_FUNCTION_INFO_V1(pg_cluster_size);
|
||||
PG_FUNCTION_INFO_V1(backpressure_lsns);
|
||||
PG_FUNCTION_INFO_V1(backpressure_throttling_time);
|
||||
PG_FUNCTION_INFO_V1(approximate_working_set_size_seconds);
|
||||
PG_FUNCTION_INFO_V1(approximate_working_set_size);
|
||||
|
||||
Datum
|
||||
pg_cluster_size(PG_FUNCTION_ARGS)
|
||||
@@ -622,52 +607,7 @@ backpressure_throttling_time(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_UINT64(BackpressureThrottlingTime());
|
||||
}
|
||||
|
||||
Datum
|
||||
approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
|
||||
{
|
||||
time_t duration;
|
||||
int32 dc;
|
||||
|
||||
duration = PG_ARGISNULL(0) ? (time_t) -1 : PG_GETARG_INT32(0);
|
||||
|
||||
dc = lfc_approximate_working_set_size_seconds(duration, false);
|
||||
if (dc < 0)
|
||||
PG_RETURN_NULL();
|
||||
else
|
||||
PG_RETURN_INT32(dc);
|
||||
}
|
||||
|
||||
Datum
|
||||
approximate_working_set_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool reset = PG_GETARG_BOOL(0);
|
||||
int32 dc;
|
||||
|
||||
dc = lfc_approximate_working_set_size_seconds(-1, reset);
|
||||
if (dc < 0)
|
||||
PG_RETURN_NULL();
|
||||
else
|
||||
PG_RETURN_INT32(dc);
|
||||
}
|
||||
|
||||
static void
|
||||
neon_shmem_request_hook(void)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
if (prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
#endif
|
||||
|
||||
LfcShmemRequest();
|
||||
NeonPerfCountersShmemRequest();
|
||||
PagestoreShmemRequest();
|
||||
RelsizeCacheShmemRequest();
|
||||
CommunicatorShmemRequest();
|
||||
WalproposerShmemRequest();
|
||||
LwLsnCacheShmemRequest();
|
||||
}
|
||||
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
static void
|
||||
neon_shmem_startup_hook(void)
|
||||
{
|
||||
@@ -675,16 +615,6 @@ neon_shmem_startup_hook(void)
|
||||
if (prev_shmem_startup_hook)
|
||||
prev_shmem_startup_hook();
|
||||
|
||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||
|
||||
LfcShmemInit();
|
||||
NeonPerfCountersShmemInit();
|
||||
PagestoreShmemInit();
|
||||
RelsizeCacheShmemInit();
|
||||
CommunicatorShmemInit();
|
||||
WalproposerShmemInit();
|
||||
LwLsnCacheShmemInit();
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
WAIT_EVENT_NEON_LFC_MAINTENANCE = WaitEventExtensionNew("Neon/FileCache_Maintenance");
|
||||
WAIT_EVENT_NEON_LFC_READ = WaitEventExtensionNew("Neon/FileCache_Read");
|
||||
@@ -697,9 +627,8 @@ neon_shmem_startup_hook(void)
|
||||
WAIT_EVENT_NEON_PS_READ = WaitEventExtensionNew("Neon/PS_ReadIO");
|
||||
WAIT_EVENT_NEON_WAL_DL = WaitEventExtensionNew("Neon/WAL_Download");
|
||||
#endif
|
||||
|
||||
LWLockRelease(AddinShmemInitLock);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ExecutorStart hook: start up tracking if needed
|
||||
|
||||
@@ -58,7 +58,6 @@ extern uint32 WAIT_EVENT_NEON_WAL_DL;
|
||||
(errmsg(NEON_TAG "[shard %d] " fmt, shard_no, ##__VA_ARGS__), \
|
||||
errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))
|
||||
|
||||
#define InfiniteXLogRecPtr UINT64_MAX
|
||||
|
||||
extern void pg_init_libpagestore(void);
|
||||
extern void pg_init_walproposer(void);
|
||||
@@ -72,21 +71,4 @@ extern PGDLLEXPORT void WalProposerSync(int argc, char *argv[]);
|
||||
extern PGDLLEXPORT void WalProposerMain(Datum main_arg);
|
||||
extern PGDLLEXPORT void LogicalSlotsMonitorMain(Datum main_arg);
|
||||
|
||||
extern void LfcShmemRequest(void);
|
||||
extern void PagestoreShmemRequest(void);
|
||||
extern void RelsizeCacheShmemRequest(void);
|
||||
extern void CommunicatorShmemRequest(void);
|
||||
extern void WalproposerShmemRequest(void);
|
||||
extern void LwLsnCacheShmemRequest(void);
|
||||
extern void NeonPerfCountersShmemRequest(void);
|
||||
|
||||
extern void LfcShmemInit(void);
|
||||
extern void PagestoreShmemInit(void);
|
||||
extern void RelsizeCacheShmemInit(void);
|
||||
extern void CommunicatorShmemInit(void);
|
||||
extern void WalproposerShmemInit(void);
|
||||
extern void LwLsnCacheShmemInit(void);
|
||||
extern void NeonPerfCountersShmemInit(void);
|
||||
|
||||
|
||||
#endif /* NEON_H */
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "neon.h"
|
||||
#include "neon_lwlsncache.h"
|
||||
|
||||
#include "miscadmin.h"
|
||||
@@ -82,6 +81,14 @@ static set_max_lwlsn_hook_type prev_set_max_lwlsn_hook = NULL;
|
||||
static set_lwlsn_relation_hook_type prev_set_lwlsn_relation_hook = NULL;
|
||||
static set_lwlsn_db_hook_type prev_set_lwlsn_db_hook = NULL;
|
||||
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook;
|
||||
#endif
|
||||
|
||||
static void shmemrequest(void);
|
||||
static void shmeminit(void);
|
||||
static void neon_set_max_lwlsn(XLogRecPtr lsn);
|
||||
|
||||
void
|
||||
@@ -92,6 +99,16 @@ init_lwlsncache(void)
|
||||
|
||||
lwlc_register_gucs();
|
||||
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = shmeminit;
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = shmemrequest;
|
||||
#else
|
||||
shmemrequest();
|
||||
#endif
|
||||
|
||||
prev_set_lwlsn_block_range_hook = set_lwlsn_block_range_hook;
|
||||
set_lwlsn_block_range_hook = neon_set_lwlsn_block_range;
|
||||
prev_set_lwlsn_block_v_hook = set_lwlsn_block_v_hook;
|
||||
@@ -107,19 +124,20 @@ init_lwlsncache(void)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
LwLsnCacheShmemRequest(void)
|
||||
{
|
||||
static void shmemrequest(void) {
|
||||
Size requested_size = sizeof(LwLsnCacheCtl);
|
||||
|
||||
|
||||
requested_size += hash_estimate_size(lwlsn_cache_size, sizeof(LastWrittenLsnCacheEntry));
|
||||
|
||||
RequestAddinShmemSpace(requested_size);
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
if (prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
LwLsnCacheShmemInit(void)
|
||||
{
|
||||
static void shmeminit(void) {
|
||||
static HASHCTL info;
|
||||
bool found;
|
||||
if (lwlsn_cache_size > 0)
|
||||
@@ -139,6 +157,9 @@ LwLsnCacheShmemInit(void)
|
||||
}
|
||||
dlist_init(&LwLsnCache->lastWrittenLsnLRU);
|
||||
LwLsnCache->maxLastWrittenLsn = GetRedoRecPtr();
|
||||
if (prev_shmem_startup_hook) {
|
||||
prev_shmem_startup_hook();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -17,21 +17,22 @@
|
||||
#include "storage/shmem.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
#include "neon.h"
|
||||
#include "neon_perf_counters.h"
|
||||
#include "neon_pgversioncompat.h"
|
||||
|
||||
neon_per_backend_counters *neon_per_backend_counters_shared;
|
||||
|
||||
void
|
||||
NeonPerfCountersShmemRequest(void)
|
||||
Size
|
||||
NeonPerfCountersShmemSize(void)
|
||||
{
|
||||
Size size = mul_size(NUM_NEON_PERF_COUNTER_SLOTS, sizeof(neon_per_backend_counters));
|
||||
RequestAddinShmemSpace(size);
|
||||
Size size = 0;
|
||||
|
||||
size = add_size(size, mul_size(NUM_NEON_PERF_COUNTER_SLOTS,
|
||||
sizeof(neon_per_backend_counters)));
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
NeonPerfCountersShmemInit(void)
|
||||
{
|
||||
|
||||
@@ -165,8 +165,4 @@ extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
|
||||
extern TimeLineID GetWALInsertionTimeLine(void);
|
||||
#endif
|
||||
|
||||
/* format codes not present in PG17-; but available in PG18+ */
|
||||
#define INT64_HEX_FORMAT "%" INT64_MODIFIER "x"
|
||||
#define UINT64_HEX_FORMAT "%" INT64_MODIFIER "x"
|
||||
|
||||
#endif /* NEON_PGVERSIONCOMPAT_H */
|
||||
|
||||
@@ -250,6 +250,7 @@ extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
|
||||
extern void smgr_init_neon(void);
|
||||
extern void readahead_buffer_resize(int newsize, void *extra);
|
||||
|
||||
|
||||
/*
|
||||
* LSN values associated with each request to the pageserver
|
||||
*/
|
||||
|
||||
@@ -675,7 +675,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
* always have that problem as the can always lag behind the
|
||||
* primary, but for the primary we can avoid it by always
|
||||
* requesting the latest page, by setting request LSN to
|
||||
* InfiniteXLogRecPtr.
|
||||
* UINT64_MAX.
|
||||
*
|
||||
* effective_request_lsn is used to check that received response is still valid.
|
||||
* In case of primary node it is last written LSN. Originally we used flush_lsn here,
|
||||
@@ -703,7 +703,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
* The problem can be fixed by callingGetFlushRecPtr() before checking if the page is in the buffer cache.
|
||||
* But you can't do that within smgrprefetch(), would need to modify the caller.
|
||||
*/
|
||||
result->request_lsn = InfiniteXLogRecPtr;
|
||||
result->request_lsn = UINT64_MAX;
|
||||
result->not_modified_since = last_written_lsn;
|
||||
result->effective_request_lsn = last_written_lsn;
|
||||
}
|
||||
@@ -2158,7 +2158,7 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
request_lsn = nm_adjust_lsn(request_lsn);
|
||||
}
|
||||
else
|
||||
request_lsn = InfiniteXLogRecPtr;
|
||||
request_lsn = UINT64_MAX;
|
||||
|
||||
/*
|
||||
* GetRedoStartLsn() returns LSN of the basebackup. We know that the SLRU
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "neon.h"
|
||||
#include "neon_pgversioncompat.h"
|
||||
|
||||
#include "pagestore_client.h"
|
||||
@@ -50,23 +49,32 @@ typedef struct
|
||||
* algorithm */
|
||||
} RelSizeHashControl;
|
||||
|
||||
static HTAB *relsize_hash;
|
||||
static LWLockId relsize_lock;
|
||||
static int relsize_hash_size;
|
||||
static RelSizeHashControl* relsize_ctl;
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
static void relsize_shmem_request(void);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Size of a cache entry is 36 bytes. So this default will take about 2.3 MB,
|
||||
* which seems reasonable.
|
||||
*/
|
||||
#define DEFAULT_RELSIZE_HASH_SIZE (64 * 1024)
|
||||
|
||||
static HTAB *relsize_hash;
|
||||
static LWLockId relsize_lock;
|
||||
static int relsize_hash_size = DEFAULT_RELSIZE_HASH_SIZE;
|
||||
static RelSizeHashControl* relsize_ctl;
|
||||
|
||||
void
|
||||
RelsizeCacheShmemInit(void)
|
||||
static void
|
||||
neon_smgr_shmem_startup(void)
|
||||
{
|
||||
static HASHCTL info;
|
||||
bool found;
|
||||
|
||||
if (prev_shmem_startup_hook)
|
||||
prev_shmem_startup_hook();
|
||||
|
||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||
relsize_ctl = (RelSizeHashControl *) ShmemInitStruct("relsize_hash", sizeof(RelSizeHashControl), &found);
|
||||
if (!found)
|
||||
{
|
||||
@@ -77,6 +85,7 @@ RelsizeCacheShmemInit(void)
|
||||
relsize_hash_size, relsize_hash_size,
|
||||
&info,
|
||||
HASH_ELEM | HASH_BLOBS);
|
||||
LWLockRelease(AddinShmemInitLock);
|
||||
relsize_ctl->size = 0;
|
||||
relsize_ctl->hits = 0;
|
||||
relsize_ctl->misses = 0;
|
||||
@@ -233,15 +242,34 @@ relsize_hash_init(void)
|
||||
PGC_POSTMASTER,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
if (relsize_hash_size > 0)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = relsize_shmem_request;
|
||||
#else
|
||||
RequestAddinShmemSpace(hash_estimate_size(relsize_hash_size, sizeof(RelSizeEntry)));
|
||||
RequestNamedLWLockTranche("neon_relsize", 1);
|
||||
#endif
|
||||
|
||||
prev_shmem_startup_hook = shmem_startup_hook;
|
||||
shmem_startup_hook = neon_smgr_shmem_startup;
|
||||
}
|
||||
}
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
/*
|
||||
* shmem_request hook: request additional shared resources. We'll allocate or
|
||||
* attach to the shared resources in neon_smgr_shmem_startup().
|
||||
*/
|
||||
void
|
||||
RelsizeCacheShmemRequest(void)
|
||||
static void
|
||||
relsize_shmem_request(void)
|
||||
{
|
||||
if (prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
|
||||
RequestAddinShmemSpace(sizeof(RelSizeHashControl) + hash_estimate_size(relsize_hash_size, sizeof(RelSizeEntry)));
|
||||
RequestNamedLWLockTranche("neon_relsize", 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -83,8 +83,10 @@ static XLogRecPtr standby_flush_lsn = InvalidXLogRecPtr;
|
||||
static XLogRecPtr standby_apply_lsn = InvalidXLogRecPtr;
|
||||
static HotStandbyFeedback agg_hs_feedback;
|
||||
|
||||
static void nwp_shmem_startup_hook(void);
|
||||
static void nwp_register_gucs(void);
|
||||
static void assign_neon_safekeepers(const char *newval, void *extra);
|
||||
static void nwp_prepare_shmem(void);
|
||||
static uint64 backpressure_lag_impl(void);
|
||||
static uint64 startup_backpressure_wrap(void);
|
||||
static bool backpressure_throttling_impl(void);
|
||||
@@ -97,6 +99,11 @@ static TimestampTz walprop_pg_get_current_timestamp(WalProposer *wp);
|
||||
static void walprop_pg_load_libpqwalreceiver(void);
|
||||
|
||||
static process_interrupts_callback_t PrevProcessInterruptsCallback = NULL;
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook_type;
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
static void walproposer_shmem_request(void);
|
||||
#endif
|
||||
static void WalproposerShmemInit_SyncSafekeeper(void);
|
||||
|
||||
|
||||
@@ -186,6 +193,8 @@ pg_init_walproposer(void)
|
||||
|
||||
nwp_register_gucs();
|
||||
|
||||
nwp_prepare_shmem();
|
||||
|
||||
delay_backend_us = &startup_backpressure_wrap;
|
||||
PrevProcessInterruptsCallback = ProcessInterruptsCallback;
|
||||
ProcessInterruptsCallback = backpressure_throttling_impl;
|
||||
@@ -473,11 +482,12 @@ WalproposerShmemSize(void)
|
||||
return sizeof(WalproposerShmemState);
|
||||
}
|
||||
|
||||
void
|
||||
static bool
|
||||
WalproposerShmemInit(void)
|
||||
{
|
||||
bool found;
|
||||
|
||||
LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
|
||||
walprop_shared = ShmemInitStruct("Walproposer shared state",
|
||||
sizeof(WalproposerShmemState),
|
||||
&found);
|
||||
@@ -494,6 +504,9 @@ WalproposerShmemInit(void)
|
||||
pg_atomic_init_u32(&walprop_shared->wal_rate_limiter.should_limit, 0);
|
||||
/* END_HADRON */
|
||||
}
|
||||
LWLockRelease(AddinShmemInitLock);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -596,15 +609,42 @@ walprop_register_bgworker(void)
|
||||
|
||||
/* shmem handling */
|
||||
|
||||
static void
|
||||
nwp_prepare_shmem(void)
|
||||
{
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
prev_shmem_request_hook = shmem_request_hook;
|
||||
shmem_request_hook = walproposer_shmem_request;
|
||||
#else
|
||||
RequestAddinShmemSpace(WalproposerShmemSize());
|
||||
#endif
|
||||
prev_shmem_startup_hook_type = shmem_startup_hook;
|
||||
shmem_startup_hook = nwp_shmem_startup_hook;
|
||||
}
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
/*
|
||||
* shmem_request hook: request additional shared resources. We'll allocate or
|
||||
* attach to the shared resources in WalproposerShmemInit().
|
||||
* attach to the shared resources in nwp_shmem_startup_hook().
|
||||
*/
|
||||
void
|
||||
WalproposerShmemRequest(void)
|
||||
static void
|
||||
walproposer_shmem_request(void)
|
||||
{
|
||||
if (prev_shmem_request_hook)
|
||||
prev_shmem_request_hook();
|
||||
|
||||
RequestAddinShmemSpace(WalproposerShmemSize());
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
nwp_shmem_startup_hook(void)
|
||||
{
|
||||
if (prev_shmem_startup_hook_type)
|
||||
prev_shmem_startup_hook_type();
|
||||
|
||||
WalproposerShmemInit();
|
||||
}
|
||||
|
||||
WalproposerShmemState *
|
||||
GetWalpropShmemState(void)
|
||||
|
||||
@@ -236,13 +236,13 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
|
||||
bool save_neon_test_evict;
|
||||
|
||||
/*
|
||||
* Temporarily set the neon_test_evict GUC, so that when we pin and
|
||||
* Temporarily set the zenith_test_evict GUC, so that when we pin and
|
||||
* unpin a buffer, the buffer is evicted. We use that hack to evict all
|
||||
* buffers, as there is no explicit "evict this buffer" function in the
|
||||
* buffer manager.
|
||||
*/
|
||||
save_neon_test_evict = neon_test_evict;
|
||||
neon_test_evict = true;
|
||||
save_neon_test_evict = zenith_test_evict;
|
||||
zenith_test_evict = true;
|
||||
PG_TRY();
|
||||
{
|
||||
/* Scan through all the buffers */
|
||||
@@ -273,7 +273,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
|
||||
|
||||
/*
|
||||
* Pin the buffer, and release it again. Because we have
|
||||
* neon_test_evict==true, this will evict the page from the
|
||||
* zenith_test_evict==true, this will evict the page from the
|
||||
* buffer cache if no one else is holding a pin on it.
|
||||
*/
|
||||
if (isvalid)
|
||||
@@ -286,7 +286,7 @@ clear_buffer_cache(PG_FUNCTION_ARGS)
|
||||
PG_FINALLY();
|
||||
{
|
||||
/* restore the GUC */
|
||||
neon_test_evict = save_neon_test_evict;
|
||||
zenith_test_evict = save_neon_test_evict;
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
|
||||
@@ -2953,17 +2953,17 @@ XmlTableBuilderData
|
||||
YYLTYPE
|
||||
YYSTYPE
|
||||
YY_BUFFER_STATE
|
||||
NeonErrorResponse
|
||||
NeonExistsRequest
|
||||
NeonExistsResponse
|
||||
NeonGetPageRequest
|
||||
NeonGetPageResponse
|
||||
NeonMessage
|
||||
NeonMessageTag
|
||||
NeonNblocksRequest
|
||||
NeonNblocksResponse
|
||||
NeonRequest
|
||||
NeonResponse
|
||||
ZenithErrorResponse
|
||||
ZenithExistsRequest
|
||||
ZenithExistsResponse
|
||||
ZenithGetPageRequest
|
||||
ZenithGetPageResponse
|
||||
ZenithMessage
|
||||
ZenithMessageTag
|
||||
ZenithNblocksRequest
|
||||
ZenithNblocksResponse
|
||||
ZenithRequest
|
||||
ZenithResponse
|
||||
_SPI_connection
|
||||
_SPI_plan
|
||||
__AssignProcessToJobObject
|
||||
|
||||
209
poetry.lock
generated
209
poetry.lock
generated
@@ -2,123 +2,127 @@
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
version = "2.6.1"
|
||||
version = "2.3.5"
|
||||
description = "Happy Eyeballs for asyncio"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
|
||||
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
|
||||
{file = "aiohappyeyeballs-2.3.5-py3-none-any.whl", hash = "sha256:4d6dea59215537dbc746e93e779caea8178c866856a721c9c660d7a5a7b8be03"},
|
||||
{file = "aiohappyeyeballs-2.3.5.tar.gz", hash = "sha256:6fa48b9f1317254f122a07a131a86b71ca6946ca989ce6326fff54a99a920105"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
version = "3.12.14"
|
||||
version = "3.10.11"
|
||||
description = "Async http client/server framework (asyncio)"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:906d5075b5ba0dd1c66fcaaf60eb09926a9fef3ca92d912d2a0bbdbecf8b1248"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c875bf6fc2fd1a572aba0e02ef4e7a63694778c5646cdbda346ee24e630d30fb"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fbb284d15c6a45fab030740049d03c0ecd60edad9cd23b211d7e11d3be8d56fd"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38e360381e02e1a05d36b223ecab7bc4a6e7b5ab15760022dc92589ee1d4238c"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aaf90137b5e5d84a53632ad95ebee5c9e3e7468f0aab92ba3f608adcb914fa95"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e532a25e4a0a2685fa295a31acf65e027fbe2bea7a4b02cdfbbba8a064577663"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eab9762c4d1b08ae04a6c77474e6136da722e34fdc0e6d6eab5ee93ac29f35d1"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abe53c3812b2899889a7fca763cdfaeee725f5be68ea89905e4275476ffd7e61"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5760909b7080aa2ec1d320baee90d03b21745573780a072b66ce633eb77a8656"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:02fcd3f69051467bbaa7f84d7ec3267478c7df18d68b2e28279116e29d18d4f3"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:4dcd1172cd6794884c33e504d3da3c35648b8be9bfa946942d353b939d5f1288"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:224d0da41355b942b43ad08101b1b41ce633a654128ee07e36d75133443adcda"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e387668724f4d734e865c1776d841ed75b300ee61059aca0b05bce67061dcacc"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:dec9cde5b5a24171e0b0a4ca064b1414950904053fb77c707efd876a2da525d8"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bbad68a2af4877cc103cd94af9160e45676fc6f0c14abb88e6e092b945c2c8e3"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-win32.whl", hash = "sha256:ee580cb7c00bd857b3039ebca03c4448e84700dc1322f860cf7a500a6f62630c"},
|
||||
{file = "aiohttp-3.12.14-cp310-cp310-win_amd64.whl", hash = "sha256:cf4f05b8cea571e2ccc3ca744e35ead24992d90a72ca2cf7ab7a2efbac6716db"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f4552ff7b18bcec18b60a90c6982049cdb9dac1dba48cf00b97934a06ce2e597"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8283f42181ff6ccbcf25acaae4e8ab2ff7e92b3ca4a4ced73b2c12d8cd971393"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:040afa180ea514495aaff7ad34ec3d27826eaa5d19812730fe9e529b04bb2179"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b413c12f14c1149f0ffd890f4141a7471ba4b41234fe4fd4a0ff82b1dc299dbb"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:1d6f607ce2e1a93315414e3d448b831238f1874b9968e1195b06efaa5c87e245"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:565e70d03e924333004ed101599902bba09ebb14843c8ea39d657f037115201b"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4699979560728b168d5ab63c668a093c9570af2c7a78ea24ca5212c6cdc2b641"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad5fdf6af93ec6c99bf800eba3af9a43d8bfd66dce920ac905c817ef4a712afe"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ac76627c0b7ee0e80e871bde0d376a057916cb008a8f3ffc889570a838f5cc7"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:798204af1180885651b77bf03adc903743a86a39c7392c472891649610844635"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4f1205f97de92c37dd71cf2d5bcfb65fdaed3c255d246172cce729a8d849b4da"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:76ae6f1dd041f85065d9df77c6bc9c9703da9b5c018479d20262acc3df97d419"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a194ace7bc43ce765338ca2dfb5661489317db216ea7ea700b0332878b392cab"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:16260e8e03744a6fe3fcb05259eeab8e08342c4c33decf96a9dad9f1187275d0"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8c779e5ebbf0e2e15334ea404fcce54009dc069210164a244d2eac8352a44b28"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-win32.whl", hash = "sha256:a289f50bf1bd5be227376c067927f78079a7bdeccf8daa6a9e65c38bae14324b"},
|
||||
{file = "aiohttp-3.12.14-cp311-cp311-win_amd64.whl", hash = "sha256:0b8a69acaf06b17e9c54151a6c956339cf46db4ff72b3ac28516d0f7068f4ced"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a0ecbb32fc3e69bc25efcda7d28d38e987d007096cbbeed04f14a6662d0eee22"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0400f0ca9bb3e0b02f6466421f253797f6384e9845820c8b05e976398ac1d81a"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a56809fed4c8a830b5cae18454b7464e1529dbf66f71c4772e3cfa9cbec0a1ff"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f2e373276e4755691a963e5d11756d093e346119f0627c2d6518208483fb6d"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:ca39e433630e9a16281125ef57ece6817afd1d54c9f1bf32e901f38f16035869"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c748b3f8b14c77720132b2510a7d9907a03c20ba80f469e58d5dfd90c079a1c"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a568abe1b15ce69d4cc37e23020720423f0728e3cb1f9bcd3f53420ec3bfe7"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9888e60c2c54eaf56704b17feb558c7ed6b7439bca1e07d4818ab878f2083660"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3006a1dc579b9156de01e7916d38c63dc1ea0679b14627a37edf6151bc530088"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa8ec5c15ab80e5501a26719eb48a55f3c567da45c6ea5bb78c52c036b2655c7"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:39b94e50959aa07844c7fe2206b9f75d63cc3ad1c648aaa755aa257f6f2498a9"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:04c11907492f416dad9885d503fbfc5dcb6768d90cad8639a771922d584609d3"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:88167bd9ab69bb46cee91bd9761db6dfd45b6e76a0438c7e884c3f8160ff21eb"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:791504763f25e8f9f251e4688195e8b455f8820274320204f7eafc467e609425"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2785b112346e435dd3a1a67f67713a3fe692d288542f1347ad255683f066d8e0"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-win32.whl", hash = "sha256:15f5f4792c9c999a31d8decf444e79fcfd98497bf98e94284bf390a7bb8c1729"},
|
||||
{file = "aiohttp-3.12.14-cp312-cp312-win_amd64.whl", hash = "sha256:3b66e1a182879f579b105a80d5c4bd448b91a57e8933564bf41665064796a338"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3143a7893d94dc82bc409f7308bc10d60285a3cd831a68faf1aa0836c5c3c767"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3d62ac3d506cef54b355bd34c2a7c230eb693880001dfcda0bf88b38f5d7af7e"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:48e43e075c6a438937c4de48ec30fa8ad8e6dfef122a038847456bfe7b947b63"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:077b4488411a9724cecc436cbc8c133e0d61e694995b8de51aaf351c7578949d"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d8c35632575653f297dcbc9546305b2c1133391089ab925a6a3706dfa775ccab"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b8ce87963f0035c6834b28f061df90cf525ff7c9b6283a8ac23acee6502afd4"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0a2cf66e32a2563bb0766eb24eae7e9a269ac0dc48db0aae90b575dc9583026"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdea089caf6d5cde975084a884c72d901e36ef9c2fd972c9f51efbbc64e96fbd"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a7865f27db67d49e81d463da64a59365ebd6b826e0e4847aa111056dcb9dc88"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0ab5b38a6a39781d77713ad930cb5e7feea6f253de656a5f9f281a8f5931b086"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9b3b15acee5c17e8848d90a4ebc27853f37077ba6aec4d8cb4dbbea56d156933"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e4c972b0bdaac167c1e53e16a16101b17c6d0ed7eac178e653a07b9f7fad7151"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7442488b0039257a3bdbc55f7209587911f143fca11df9869578db6c26feeeb8"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f68d3067eecb64c5e9bab4a26aa11bd676f4c70eea9ef6536b0a4e490639add3"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f88d3704c8b3d598a08ad17d06006cb1ca52a1182291f04979e305c8be6c9758"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-win32.whl", hash = "sha256:a3c99ab19c7bf375c4ae3debd91ca5d394b98b6089a03231d4c580ef3c2ae4c5"},
|
||||
{file = "aiohttp-3.12.14-cp313-cp313-win_amd64.whl", hash = "sha256:3f8aad695e12edc9d571f878c62bedc91adf30c760c8632f09663e5f564f4baa"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b8cc6b05e94d837bcd71c6531e2344e1ff0fb87abe4ad78a9261d67ef5d83eae"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1dcb015ac6a3b8facd3677597edd5ff39d11d937456702f0bb2b762e390a21b"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3779ed96105cd70ee5e85ca4f457adbce3d9ff33ec3d0ebcdf6c5727f26b21b3"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:717a0680729b4ebd7569c1dcd718c46b09b360745fd8eb12317abc74b14d14d0"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b5dd3a2ef7c7e968dbbac8f5574ebeac4d2b813b247e8cec28174a2ba3627170"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4710f77598c0092239bc12c1fcc278a444e16c7032d91babf5abbf7166463f7b"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f3e9f75ae842a6c22a195d4a127263dbf87cbab729829e0bd7857fb1672400b2"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f9c8d55d6802086edd188e3a7d85a77787e50d56ce3eb4757a3205fa4657922"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79b29053ff3ad307880d94562cca80693c62062a098a5776ea8ef5ef4b28d140"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23e1332fff36bebd3183db0c7a547a1da9d3b4091509f6d818e098855f2f27d3"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:a564188ce831fd110ea76bcc97085dd6c625b427db3f1dbb14ca4baa1447dcbc"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a7a1b4302f70bb3ec40ca86de82def532c97a80db49cac6a6700af0de41af5ee"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:1b07ccef62950a2519f9bfc1e5b294de5dd84329f444ca0b329605ea787a3de5"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:938bd3ca6259e7e48b38d84f753d548bd863e0c222ed6ee6ace3fd6752768a84"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8bc784302b6b9f163b54c4e93d7a6f09563bd01ff2b841b29ed3ac126e5040bf"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-win32.whl", hash = "sha256:a3416f95961dd7d5393ecff99e3f41dc990fb72eda86c11f2a60308ac6dcd7a0"},
|
||||
{file = "aiohttp-3.12.14-cp39-cp39-win_amd64.whl", hash = "sha256:196858b8820d7f60578f8b47e5669b3195c21d8ab261e39b1d705346458f445f"},
|
||||
{file = "aiohttp-3.12.14.tar.gz", hash = "sha256:6e06e120e34d93100de448fd941522e11dafa78ef1a893c179901b7d66aa29f2"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"},
|
||||
{file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"},
|
||||
{file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"},
|
||||
{file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"},
|
||||
{file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"},
|
||||
{file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"},
|
||||
{file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohappyeyeballs = ">=2.5.0"
|
||||
aiosignal = ">=1.4.0"
|
||||
aiohappyeyeballs = ">=2.3.0"
|
||||
aiosignal = ">=1.1.2"
|
||||
attrs = ">=17.3.0"
|
||||
frozenlist = ">=1.1.1"
|
||||
multidict = ">=4.5,<7.0"
|
||||
propcache = ">=0.2.0"
|
||||
yarl = ">=1.17.0,<2.0"
|
||||
yarl = ">=1.12.0,<2.0"
|
||||
|
||||
[package.extras]
|
||||
speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "brotlicffi ; platform_python_implementation != \"CPython\""]
|
||||
speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
|
||||
|
||||
[[package]]
|
||||
name = "aiopg"
|
||||
@@ -141,19 +145,18 @@ sa = ["sqlalchemy[postgresql-psycopg2binary] (>=1.3,<1.5)"]
|
||||
|
||||
[[package]]
|
||||
name = "aiosignal"
|
||||
version = "1.4.0"
|
||||
version = "1.3.1"
|
||||
description = "aiosignal: a list of registered asynchronous callbacks"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"},
|
||||
{file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"},
|
||||
{file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
|
||||
{file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
frozenlist = ">=1.1.0"
|
||||
typing-extensions = {version = ">=4.2", markers = "python_version < \"3.13\""}
|
||||
|
||||
[[package]]
|
||||
name = "allure-pytest"
|
||||
@@ -3844,4 +3847,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "6a1e8ba06b8194bf28d87fd5e184e2ddc2b4a19dffcbe3953b26da3d55c9212f"
|
||||
content-hash = "bd93313f110110aa53b24a3ed47ba2d7f60e2c658a79cdff7320fed1bb1b57b5"
|
||||
|
||||
@@ -16,7 +16,6 @@ async-compression.workspace = true
|
||||
async-trait.workspace = true
|
||||
atomic-take.workspace = true
|
||||
aws-config.workspace = true
|
||||
aws-credential-types.workspace = true
|
||||
aws-sdk-iam.workspace = true
|
||||
aws-sigv4.workspace = true
|
||||
base64.workspace = true
|
||||
@@ -49,7 +48,6 @@ indexmap = { workspace = true, features = ["serde"] }
|
||||
ipnet.workspace = true
|
||||
itertools.workspace = true
|
||||
itoa.workspace = true
|
||||
json = { path = "../libs/proxy/json" }
|
||||
lasso = { workspace = true, features = ["multi-threaded"] }
|
||||
measured = { workspace = true, features = ["lasso"] }
|
||||
metrics.workspace = true
|
||||
@@ -129,4 +127,4 @@ rstest.workspace = true
|
||||
walkdir.workspace = true
|
||||
rand_distr = "0.4"
|
||||
tokio-postgres.workspace = true
|
||||
tracing-test = "0.2"
|
||||
tracing-test = "0.2"
|
||||
@@ -123,11 +123,6 @@ docker exec -it proxy-postgres psql -U postgres -c "CREATE TABLE neon_control_pl
|
||||
docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPERUSER LOGIN PASSWORD 'password';"
|
||||
```
|
||||
|
||||
If you want to test query cancellation, redis is also required:
|
||||
```sh
|
||||
docker run --detach --name proxy-redis --publish 6379:6379 redis:7.0
|
||||
```
|
||||
|
||||
Let's create self-signed certificate by running:
|
||||
```sh
|
||||
openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.local.neon.build"
|
||||
@@ -135,10 +130,7 @@ openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key
|
||||
|
||||
Then we need to build proxy with 'testing' feature and run, e.g.:
|
||||
```sh
|
||||
RUST_LOG=proxy LOGFMT=text cargo run -p proxy --bin proxy --features testing -- \
|
||||
--auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' \
|
||||
--redis-auth-type="plain" --redis-plain="redis://127.0.0.1:6379" \
|
||||
-c server.crt -k server.key
|
||||
RUST_LOG=proxy LOGFMT=text cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
|
||||
```
|
||||
|
||||
Now from client you can start a new session:
|
||||
|
||||
@@ -7,17 +7,13 @@ use std::pin::pin;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use scopeguard::ScopeGuard;
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
|
||||
use crate::ext::LockExt;
|
||||
|
||||
type ProcResult<P> = Result<<P as QueueProcessing>::Res, <P as QueueProcessing>::Err>;
|
||||
|
||||
pub trait QueueProcessing: Send + 'static {
|
||||
type Req: Send + 'static;
|
||||
type Res: Send;
|
||||
type Err: Send + Clone;
|
||||
|
||||
/// Get the desired batch size.
|
||||
fn batch_size(&self, queue_size: usize) -> usize;
|
||||
@@ -28,18 +24,7 @@ pub trait QueueProcessing: Send + 'static {
|
||||
/// If this apply can error, it's expected that errors be forwarded to each Self::Res.
|
||||
///
|
||||
/// Batching does not need to happen atomically.
|
||||
fn apply(
|
||||
&mut self,
|
||||
req: Vec<Self::Req>,
|
||||
) -> impl Future<Output = Result<Vec<Self::Res>, Self::Err>> + Send;
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error)]
|
||||
pub enum BatchQueueError<E: Clone, C> {
|
||||
#[error(transparent)]
|
||||
Result(E),
|
||||
#[error(transparent)]
|
||||
Cancelled(C),
|
||||
fn apply(&mut self, req: Vec<Self::Req>) -> impl Future<Output = Vec<Self::Res>> + Send;
|
||||
}
|
||||
|
||||
pub struct BatchQueue<P: QueueProcessing> {
|
||||
@@ -49,7 +34,7 @@ pub struct BatchQueue<P: QueueProcessing> {
|
||||
|
||||
struct BatchJob<P: QueueProcessing> {
|
||||
req: P::Req,
|
||||
res: tokio::sync::oneshot::Sender<Result<P::Res, P::Err>>,
|
||||
res: tokio::sync::oneshot::Sender<P::Res>,
|
||||
}
|
||||
|
||||
impl<P: QueueProcessing> BatchQueue<P> {
|
||||
@@ -70,11 +55,11 @@ impl<P: QueueProcessing> BatchQueue<P> {
|
||||
&self,
|
||||
req: P::Req,
|
||||
cancelled: impl Future<Output = R>,
|
||||
) -> Result<P::Res, BatchQueueError<P::Err, R>> {
|
||||
) -> Result<P::Res, R> {
|
||||
let (id, mut rx) = self.inner.lock_propagate_poison().register_job(req);
|
||||
|
||||
let mut cancelled = pin!(cancelled);
|
||||
let resp: Option<Result<P::Res, P::Err>> = loop {
|
||||
let resp = loop {
|
||||
// try become the leader, or try wait for success.
|
||||
let mut processor = tokio::select! {
|
||||
// try become leader.
|
||||
@@ -87,7 +72,7 @@ impl<P: QueueProcessing> BatchQueue<P> {
|
||||
if inner.queue.remove(&id).is_some() {
|
||||
tracing::warn!("batched task cancelled before completion");
|
||||
}
|
||||
return Err(BatchQueueError::Cancelled(cancel));
|
||||
return Err(cancel);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -111,30 +96,18 @@ impl<P: QueueProcessing> BatchQueue<P> {
|
||||
// good: we didn't get cancelled.
|
||||
ScopeGuard::into_inner(cancel_safety);
|
||||
|
||||
match values {
|
||||
Ok(values) => {
|
||||
if values.len() != resps.len() {
|
||||
tracing::error!(
|
||||
"batch: invalid response size, expected={}, got={}",
|
||||
resps.len(),
|
||||
values.len()
|
||||
);
|
||||
}
|
||||
if values.len() != resps.len() {
|
||||
tracing::error!(
|
||||
"batch: invalid response size, expected={}, got={}",
|
||||
resps.len(),
|
||||
values.len()
|
||||
);
|
||||
}
|
||||
|
||||
// send response values.
|
||||
for (tx, value) in std::iter::zip(resps, values) {
|
||||
if tx.send(Ok(value)).is_err() {
|
||||
// receiver hung up but that's fine.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(err) => {
|
||||
for tx in resps {
|
||||
if tx.send(Err(err.clone())).is_err() {
|
||||
// receiver hung up but that's fine.
|
||||
}
|
||||
}
|
||||
// send response values.
|
||||
for (tx, value) in std::iter::zip(resps, values) {
|
||||
if tx.send(value).is_err() {
|
||||
// receiver hung up but that's fine.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,8 +129,7 @@ impl<P: QueueProcessing> BatchQueue<P> {
|
||||
|
||||
tracing::debug!(id, "batch: job completed");
|
||||
|
||||
resp.expect("no response found. batch processer should not panic")
|
||||
.map_err(BatchQueueError::Result)
|
||||
Ok(resp.expect("no response found. batch processer should not panic"))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,8 +139,8 @@ struct BatchQueueInner<P: QueueProcessing> {
|
||||
}
|
||||
|
||||
impl<P: QueueProcessing> BatchQueueInner<P> {
|
||||
fn register_job(&mut self, req: P::Req) -> (u64, oneshot::Receiver<ProcResult<P>>) {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
fn register_job(&mut self, req: P::Req) -> (u64, tokio::sync::oneshot::Receiver<P::Res>) {
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
|
||||
let id = self.version;
|
||||
|
||||
@@ -186,7 +158,7 @@ impl<P: QueueProcessing> BatchQueueInner<P> {
|
||||
(id, rx)
|
||||
}
|
||||
|
||||
fn get_batch(&mut self, p: &P) -> (Vec<P::Req>, Vec<oneshot::Sender<ProcResult<P>>>) {
|
||||
fn get_batch(&mut self, p: &P) -> (Vec<P::Req>, Vec<tokio::sync::oneshot::Sender<P::Res>>) {
|
||||
let batch_size = p.batch_size(self.queue.len());
|
||||
let mut reqs = Vec::with_capacity(batch_size);
|
||||
let mut resps = Vec::with_capacity(batch_size);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user