mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 21:50:37 +00:00
Compare commits
42 Commits
release-pr
...
skyzh/fix-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a769aad4b | ||
|
|
ad0c5fdae7 | ||
|
|
2b041964b3 | ||
|
|
d4c059a884 | ||
|
|
2c56c46d48 | ||
|
|
d1728a6bcd | ||
|
|
0a27973584 | ||
|
|
07c2411f6b | ||
|
|
5819938c93 | ||
|
|
b7548de814 | ||
|
|
9794f386f4 | ||
|
|
79083de61c | ||
|
|
ec9079f483 | ||
|
|
b9b25e13a0 | ||
|
|
cf2e695f49 | ||
|
|
fc233794f6 | ||
|
|
c002236145 | ||
|
|
4af0b9b387 | ||
|
|
0e00faf528 | ||
|
|
7747a9619f | ||
|
|
46100717ad | ||
|
|
00eeff9b8d | ||
|
|
2a46426157 | ||
|
|
edc11253b6 | ||
|
|
b4e26a6284 | ||
|
|
96b46365e4 | ||
|
|
aa19f10e7e | ||
|
|
35170656fe | ||
|
|
cd9ad75797 | ||
|
|
eadb05f78e | ||
|
|
c5115518e9 | ||
|
|
931f8c4300 | ||
|
|
0f7c2cc382 | ||
|
|
983d56502b | ||
|
|
bcef542d5b | ||
|
|
e31455d936 | ||
|
|
a4ea7d6194 | ||
|
|
19bea5fd0c | ||
|
|
5be94e28c4 | ||
|
|
63a106021a | ||
|
|
9a6ace9bde | ||
|
|
8c77ccfc01 |
1
.github/actionlint.yml
vendored
1
.github/actionlint.yml
vendored
@@ -6,6 +6,7 @@ self-hosted-runner:
|
|||||||
- small
|
- small
|
||||||
- small-metal
|
- small-metal
|
||||||
- small-arm64
|
- small-arm64
|
||||||
|
- unit-perf
|
||||||
- us-east-2
|
- us-east-2
|
||||||
config-variables:
|
config-variables:
|
||||||
- AWS_ECR_REGION
|
- AWS_ECR_REGION
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ runs:
|
|||||||
|
|
||||||
- name: Install Allure
|
- name: Install Allure
|
||||||
shell: bash -euxo pipefail {0}
|
shell: bash -euxo pipefail {0}
|
||||||
|
working-directory: /tmp
|
||||||
run: |
|
run: |
|
||||||
if ! which allure; then
|
if ! which allure; then
|
||||||
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
|
ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
|
||||||
|
|||||||
@@ -113,8 +113,6 @@ runs:
|
|||||||
TEST_OUTPUT: /tmp/test_output
|
TEST_OUTPUT: /tmp/test_output
|
||||||
BUILD_TYPE: ${{ inputs.build_type }}
|
BUILD_TYPE: ${{ inputs.build_type }}
|
||||||
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
|
||||||
ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
|
|
||||||
ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
|
|
||||||
RERUN_FAILED: ${{ inputs.rerun_failed }}
|
RERUN_FAILED: ${{ inputs.rerun_failed }}
|
||||||
PG_VERSION: ${{ inputs.pg_version }}
|
PG_VERSION: ${{ inputs.pg_version }}
|
||||||
SANITIZERS: ${{ inputs.sanitizers }}
|
SANITIZERS: ${{ inputs.sanitizers }}
|
||||||
|
|||||||
12
.github/workflows/_build-and-test-locally.yml
vendored
12
.github/workflows/_build-and-test-locally.yml
vendored
@@ -272,10 +272,13 @@ jobs:
|
|||||||
# run pageserver tests with different settings
|
# run pageserver tests with different settings
|
||||||
for get_vectored_concurrent_io in sequential sidecar-task; do
|
for get_vectored_concurrent_io in sequential sidecar-task; do
|
||||||
for io_engine in std-fs tokio-epoll-uring ; do
|
for io_engine in std-fs tokio-epoll-uring ; do
|
||||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
for io_mode in buffered direct direct-rw ; do
|
||||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||||
${cov_prefix} \
|
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=$io_mode \
|
||||||
|
${cov_prefix} \
|
||||||
|
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||||
|
done
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -392,6 +395,7 @@ jobs:
|
|||||||
BUILD_TAG: ${{ inputs.build-tag }}
|
BUILD_TAG: ${{ inputs.build-tag }}
|
||||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||||
|
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
|
||||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||||
|
|
||||||
# Temporary disable this step until we figure out why it's so flaky
|
# Temporary disable this step until we figure out why it's so flaky
|
||||||
|
|||||||
11
.github/workflows/_create-release-pr.yml
vendored
11
.github/workflows/_create-release-pr.yml
vendored
@@ -53,10 +53,13 @@ jobs:
|
|||||||
|| inputs.component-name == 'Compute' && 'release-compute'
|
|| inputs.component-name == 'Compute' && 'release-compute'
|
||||||
}}
|
}}
|
||||||
run: |
|
run: |
|
||||||
today=$(date +'%Y-%m-%d')
|
now_date=$(date -u +'%Y-%m-%d')
|
||||||
echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
|
now_time=$(date -u +'%H-%M-%Z')
|
||||||
echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT}
|
{
|
||||||
echo "release-branch=${RELEASE_BRANCH}" | tee -a ${GITHUB_OUTPUT}
|
echo "title=${COMPONENT_NAME} release ${now_date}"
|
||||||
|
echo "rc-branch=rc/${RELEASE_BRANCH}/${now_date}_${now_time}"
|
||||||
|
echo "release-branch=${RELEASE_BRANCH}"
|
||||||
|
} | tee -a ${GITHUB_OUTPUT}
|
||||||
|
|
||||||
- name: Configure git
|
- name: Configure git
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
6
.github/workflows/build_and_test.yml
vendored
6
.github/workflows/build_and_test.yml
vendored
@@ -284,7 +284,7 @@ jobs:
|
|||||||
statuses: write
|
statuses: write
|
||||||
contents: write
|
contents: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
runs-on: [ self-hosted, small-metal ]
|
runs-on: [ self-hosted, unit-perf ]
|
||||||
container:
|
container:
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||||
credentials:
|
credentials:
|
||||||
@@ -323,6 +323,8 @@ jobs:
|
|||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||||
|
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||||
|
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
|
||||||
SYNC_BETWEEN_TESTS: true
|
SYNC_BETWEEN_TESTS: true
|
||||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||||
# while coverage is currently collected for the debug ones
|
# while coverage is currently collected for the debug ones
|
||||||
@@ -1271,7 +1273,7 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
|
needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, trigger-custom-extensions-build-and-wait ]
|
||||||
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
|
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
|
||||||
if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
|
if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
|
||||||
permissions:
|
permissions:
|
||||||
|
|||||||
4
.github/workflows/pg-clients.yml
vendored
4
.github/workflows/pg-clients.yml
vendored
@@ -30,7 +30,7 @@ permissions:
|
|||||||
statuses: write # require for posting a status update
|
statuses: write # require for posting a status update
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DEFAULT_PG_VERSION: 16
|
DEFAULT_PG_VERSION: 17
|
||||||
PLATFORM: neon-captest-new
|
PLATFORM: neon-captest-new
|
||||||
AWS_DEFAULT_REGION: eu-central-1
|
AWS_DEFAULT_REGION: eu-central-1
|
||||||
|
|
||||||
@@ -42,6 +42,8 @@ jobs:
|
|||||||
github-event-name: ${{ github.event_name }}
|
github-event-name: ${{ github.event_name }}
|
||||||
|
|
||||||
build-build-tools-image:
|
build-build-tools-image:
|
||||||
|
permissions:
|
||||||
|
packages: write
|
||||||
needs: [ check-permissions ]
|
needs: [ check-permissions ]
|
||||||
uses: ./.github/workflows/build-build-tools-image.yml
|
uses: ./.github/workflows/build-build-tools-image.yml
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|||||||
14
Cargo.lock
generated
14
Cargo.lock
generated
@@ -1416,6 +1416,7 @@ name = "control_plane"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"base64 0.13.1",
|
||||||
"camino",
|
"camino",
|
||||||
"clap",
|
"clap",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
@@ -1425,10 +1426,12 @@ dependencies = [
|
|||||||
"humantime",
|
"humantime",
|
||||||
"humantime-serde",
|
"humantime-serde",
|
||||||
"hyper 0.14.30",
|
"hyper 0.14.30",
|
||||||
|
"jsonwebtoken",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"pageserver_client",
|
"pageserver_client",
|
||||||
|
"pem",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
"postgres_connection",
|
"postgres_connection",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -1437,6 +1440,8 @@ dependencies = [
|
|||||||
"scopeguard",
|
"scopeguard",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sha2",
|
||||||
|
"spki 0.7.3",
|
||||||
"storage_broker",
|
"storage_broker",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -2817,6 +2822,7 @@ dependencies = [
|
|||||||
"hyper 0.14.30",
|
"hyper 0.14.30",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"jemalloc_pprof",
|
"jemalloc_pprof",
|
||||||
|
"jsonwebtoken",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pprof",
|
"pprof",
|
||||||
@@ -4269,6 +4275,7 @@ dependencies = [
|
|||||||
"hyper 0.14.30",
|
"hyper 0.14.30",
|
||||||
"indoc",
|
"indoc",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
|
"jsonwebtoken",
|
||||||
"md5",
|
"md5",
|
||||||
"metrics",
|
"metrics",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
@@ -4345,6 +4352,7 @@ dependencies = [
|
|||||||
"humantime-serde",
|
"humantime-serde",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
|
"once_cell",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
"postgres_ffi",
|
"postgres_ffi",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
@@ -5685,9 +5693,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.17.13"
|
version = "0.17.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
|
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@@ -5988,6 +5996,7 @@ dependencies = [
|
|||||||
"humantime",
|
"humantime",
|
||||||
"hyper 0.14.30",
|
"hyper 0.14.30",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
|
"jsonwebtoken",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
@@ -7872,6 +7881,7 @@ dependencies = [
|
|||||||
"metrics",
|
"metrics",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"pem",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"postgres_connection",
|
"postgres_connection",
|
||||||
"pprof",
|
"pprof",
|
||||||
|
|||||||
@@ -141,6 +141,7 @@ parking_lot = "0.12"
|
|||||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||||
parquet_derive = "53"
|
parquet_derive = "53"
|
||||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||||
|
pem = "3.0.3"
|
||||||
pin-project-lite = "0.2"
|
pin-project-lite = "0.2"
|
||||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
|
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
|
||||||
procfs = "0.16"
|
procfs = "0.16"
|
||||||
@@ -174,6 +175,7 @@ signal-hook = "0.3"
|
|||||||
smallvec = "1.11"
|
smallvec = "1.11"
|
||||||
smol_str = { version = "0.2.0", features = ["serde"] }
|
smol_str = { version = "0.2.0", features = ["serde"] }
|
||||||
socket2 = "0.5"
|
socket2 = "0.5"
|
||||||
|
spki = "0.7.3"
|
||||||
strum = "0.26"
|
strum = "0.26"
|
||||||
strum_macros = "0.26"
|
strum_macros = "0.26"
|
||||||
"subtle" = "2.5.0"
|
"subtle" = "2.5.0"
|
||||||
|
|||||||
@@ -270,7 +270,7 @@ By default, this runs both debug and release modes, and all supported postgres v
|
|||||||
testing locally, it is convenient to run just one set of permutations, like this:
|
testing locally, it is convenient to run just one set of permutations, like this:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
|
DEFAULT_PG_VERSION=17 BUILD_TYPE=release ./scripts/pytest
|
||||||
```
|
```
|
||||||
|
|
||||||
## Flamegraphs
|
## Flamegraphs
|
||||||
|
|||||||
@@ -12,3 +12,5 @@ disallowed-macros = [
|
|||||||
# cannot disallow this, because clippy finds used from tokio macros
|
# cannot disallow this, because clippy finds used from tokio macros
|
||||||
#"tokio::pin",
|
#"tokio::pin",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
allow-unwrap-in-tests = true
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ index 7a4b88c..56678af 100644
|
|||||||
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
||||||
|
|
||||||
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
||||||
index b667478..dc95d89 100644
|
index b667478..1298aa1 100644
|
||||||
--- a/src/hnswbuild.c
|
--- a/src/hnswbuild.c
|
||||||
+++ b/src/hnswbuild.c
|
+++ b/src/hnswbuild.c
|
||||||
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||||
@@ -36,7 +36,7 @@ index b667478..dc95d89 100644
|
|||||||
/* Close relations within worker */
|
/* Close relations within worker */
|
||||||
index_close(indexRel, indexLockmode);
|
index_close(indexRel, indexLockmode);
|
||||||
table_close(heapRel, heapLockmode);
|
table_close(heapRel, heapLockmode);
|
||||||
@@ -1100,12 +1108,39 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
@@ -1100,13 +1108,25 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||||
SeedRandom(42);
|
SeedRandom(42);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -48,32 +48,17 @@ index b667478..dc95d89 100644
|
|||||||
|
|
||||||
BuildGraph(buildstate, forkNum);
|
BuildGraph(buildstate, forkNum);
|
||||||
|
|
||||||
- if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
|
|
||||||
+#ifdef NEON_SMGR
|
+#ifdef NEON_SMGR
|
||||||
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
|
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
|
||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
+ if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) {
|
if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
|
||||||
log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
|
log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
|
||||||
+#ifdef NEON_SMGR
|
|
||||||
+ {
|
|
||||||
+#if PG_VERSION_NUM >= 160000
|
|
||||||
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
|
||||||
+#else
|
|
||||||
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
|
||||||
+#endif
|
|
||||||
+ if (set_lwlsn_block_range_hook)
|
|
||||||
+ set_lwlsn_block_range_hook(XactLastRecEnd, rlocator,
|
|
||||||
+ MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
|
||||||
+ if (set_lwlsn_relation_hook)
|
|
||||||
+ set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
|
||||||
+ }
|
|
||||||
+#endif
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+#ifdef NEON_SMGR
|
+#ifdef NEON_SMGR
|
||||||
+ smgr_end_unlogged_build(RelationGetSmgr(index));
|
+ smgr_end_unlogged_build(RelationGetSmgr(index));
|
||||||
+#endif
|
+#endif
|
||||||
|
+
|
||||||
FreeBuildState(buildstate);
|
FreeBuildState(buildstate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
diff --git a/src/ruminsert.c b/src/ruminsert.c
|
diff --git a/src/ruminsert.c b/src/ruminsert.c
|
||||||
index 255e616..7a2240f 100644
|
index 255e616..1c6edb7 100644
|
||||||
--- a/src/ruminsert.c
|
--- a/src/ruminsert.c
|
||||||
+++ b/src/ruminsert.c
|
+++ b/src/ruminsert.c
|
||||||
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||||
@@ -24,24 +24,12 @@ index 255e616..7a2240f 100644
|
|||||||
/*
|
/*
|
||||||
* Write index to xlog
|
* Write index to xlog
|
||||||
*/
|
*/
|
||||||
@@ -713,6 +721,22 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
@@ -713,6 +721,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
|
||||||
UnlockReleaseBuffer(buffer);
|
UnlockReleaseBuffer(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
+#ifdef NEON_SMGR
|
+#ifdef NEON_SMGR
|
||||||
+ {
|
+ smgr_end_unlogged_build(index->rd_smgr);
|
||||||
+#if PG_VERSION_NUM >= 160000
|
|
||||||
+ RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
|
|
||||||
+#else
|
|
||||||
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
|
|
||||||
+#endif
|
|
||||||
+ if (set_lwlsn_block_range_hook)
|
|
||||||
+ set_lwlsn_block_range_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
|
|
||||||
+ if (set_lwlsn_relation_hook)
|
|
||||||
+ set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
|
|
||||||
+
|
|
||||||
+ smgr_end_unlogged_build(index->rd_smgr);
|
|
||||||
+ }
|
|
||||||
+#endif
|
+#endif
|
||||||
+
|
+
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ commands:
|
|||||||
- name: local_proxy
|
- name: local_proxy
|
||||||
user: postgres
|
user: postgres
|
||||||
sysvInitAction: respawn
|
sysvInitAction: respawn
|
||||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||||
- name: postgres-exporter
|
- name: postgres-exporter
|
||||||
user: nobody
|
user: nobody
|
||||||
sysvInitAction: respawn
|
sysvInitAction: respawn
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ commands:
|
|||||||
- name: local_proxy
|
- name: local_proxy
|
||||||
user: postgres
|
user: postgres
|
||||||
sysvInitAction: respawn
|
sysvInitAction: respawn
|
||||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||||
- name: postgres-exporter
|
- name: postgres-exporter
|
||||||
user: nobody
|
user: nobody
|
||||||
sysvInitAction: respawn
|
sysvInitAction: respawn
|
||||||
|
|||||||
@@ -57,24 +57,13 @@ use tracing::{error, info};
|
|||||||
use url::Url;
|
use url::Url;
|
||||||
use utils::failpoint_support;
|
use utils::failpoint_support;
|
||||||
|
|
||||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
|
||||||
// use the default value for extension storage proxy gateway.
|
|
||||||
// Remove this once the control plane is updated to pass the gateway URL
|
|
||||||
fn parse_remote_ext_config(arg: &str) -> Result<String> {
|
|
||||||
if arg.starts_with("http") {
|
|
||||||
Ok(arg.trim_end_matches('/').to_string())
|
|
||||||
} else {
|
|
||||||
Ok("http://pg-ext-s3-gateway".to_string())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(rename_all = "kebab-case")]
|
#[command(rename_all = "kebab-case")]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
|
||||||
pub pgbin: String,
|
pub pgbin: String,
|
||||||
|
|
||||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
|
#[arg(short = 'r', long)]
|
||||||
pub remote_ext_config: Option<String>,
|
pub remote_ext_config: Option<String>,
|
||||||
|
|
||||||
/// The port to bind the external listening HTTP server to. Clients running
|
/// The port to bind the external listening HTTP server to. Clients running
|
||||||
@@ -116,9 +105,7 @@ struct Cli {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub set_disk_quota_for_fs: Option<String>,
|
pub set_disk_quota_for_fs: Option<String>,
|
||||||
|
|
||||||
// TODO(tristan957): remove alias after compatibility tests are no longer
|
#[arg(short = 'c', long)]
|
||||||
// an issue
|
|
||||||
#[arg(short = 'c', long, alias = "spec-path")]
|
|
||||||
pub config: Option<OsString>,
|
pub config: Option<OsString>,
|
||||||
|
|
||||||
#[arg(short = 'i', long, group = "compute-id")]
|
#[arg(short = 'i', long, group = "compute-id")]
|
||||||
|
|||||||
@@ -6,4 +6,5 @@ pub(crate) mod request_id;
|
|||||||
pub(crate) use json::Json;
|
pub(crate) use json::Json;
|
||||||
pub(crate) use path::Path;
|
pub(crate) use path::Path;
|
||||||
pub(crate) use query::Query;
|
pub(crate) use query::Query;
|
||||||
|
#[allow(unused)]
|
||||||
pub(crate) use request_id::RequestId;
|
pub(crate) use request_id::RequestId;
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use std::{collections::HashSet, net::SocketAddr};
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
use axum::{RequestExt, body::Body, extract::ConnectInfo};
|
use axum::{RequestExt, body::Body};
|
||||||
use axum_extra::{
|
use axum_extra::{
|
||||||
TypedHeader,
|
TypedHeader,
|
||||||
headers::{Authorization, authorization::Bearer},
|
headers::{Authorization, authorization::Bearer},
|
||||||
@@ -13,7 +13,7 @@ use jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet};
|
|||||||
use tower_http::auth::AsyncAuthorizeRequest;
|
use tower_http::auth::AsyncAuthorizeRequest;
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
use crate::http::{JsonResponse, extract::RequestId};
|
use crate::http::JsonResponse;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(in crate::http) struct Authorize {
|
pub(in crate::http) struct Authorize {
|
||||||
@@ -52,31 +52,6 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
|
|||||||
let validation = self.validation.clone();
|
let validation = self.validation.clone();
|
||||||
|
|
||||||
Box::pin(async move {
|
Box::pin(async move {
|
||||||
let request_id = request.extract_parts::<RequestId>().await.unwrap();
|
|
||||||
|
|
||||||
// TODO: Remove this stanza after teaching neon_local and the
|
|
||||||
// regression tests to use a JWT + JWKS.
|
|
||||||
//
|
|
||||||
// https://github.com/neondatabase/neon/issues/11316
|
|
||||||
if cfg!(feature = "testing") {
|
|
||||||
warn!(%request_id, "Skipping compute_ctl authorization check");
|
|
||||||
|
|
||||||
return Ok(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
let connect_info = request
|
|
||||||
.extract_parts::<ConnectInfo<SocketAddr>>()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// In the event the request is coming from the loopback interface,
|
|
||||||
// allow all requests
|
|
||||||
if connect_info.ip().is_loopback() {
|
|
||||||
warn!(%request_id, "Bypassed authorization because request is coming from the loopback interface");
|
|
||||||
|
|
||||||
return Ok(request);
|
|
||||||
}
|
|
||||||
|
|
||||||
let TypedHeader(Authorization(bearer)) = request
|
let TypedHeader(Authorization(bearer)) = request
|
||||||
.extract_parts::<TypedHeader<Authorization<Bearer>>>()
|
.extract_parts::<TypedHeader<Authorization<Bearer>>>()
|
||||||
.await
|
.await
|
||||||
@@ -112,6 +87,8 @@ impl Authorize {
|
|||||||
token: &str,
|
token: &str,
|
||||||
validation: &Validation,
|
validation: &Validation,
|
||||||
) -> Result<TokenData<ComputeClaims>> {
|
) -> Result<TokenData<ComputeClaims>> {
|
||||||
|
debug_assert!(!jwks.keys.is_empty());
|
||||||
|
|
||||||
debug!("verifying token {}", token);
|
debug!("verifying token {}", token);
|
||||||
|
|
||||||
for jwk in jwks.keys.iter() {
|
for jwk in jwks.keys.iter() {
|
||||||
|
|||||||
@@ -6,13 +6,16 @@ license.workspace = true
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
base64.workspace = true
|
||||||
camino.workspace = true
|
camino.workspace = true
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
comfy-table.workspace = true
|
comfy-table.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
|
jsonwebtoken.workspace = true
|
||||||
nix.workspace = true
|
nix.workspace = true
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
|
pem.workspace = true
|
||||||
humantime-serde.workspace = true
|
humantime-serde.workspace = true
|
||||||
hyper0.workspace = true
|
hyper0.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
@@ -20,6 +23,8 @@ reqwest = { workspace = true, features = ["blocking", "json"] }
|
|||||||
scopeguard.workspace = true
|
scopeguard.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
|
sha2.workspace = true
|
||||||
|
spki.workspace = true
|
||||||
thiserror.workspace = true
|
thiserror.workspace = true
|
||||||
toml.workspace = true
|
toml.workspace = true
|
||||||
toml_edit.workspace = true
|
toml_edit.workspace = true
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
|||||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||||
project_git_version!(GIT_VERSION);
|
project_git_version!(GIT_VERSION);
|
||||||
|
|
||||||
const DEFAULT_PG_VERSION: u32 = 16;
|
const DEFAULT_PG_VERSION: u32 = 17;
|
||||||
|
|
||||||
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
||||||
|
|
||||||
@@ -552,6 +552,7 @@ enum EndpointCmd {
|
|||||||
Start(EndpointStartCmdArgs),
|
Start(EndpointStartCmdArgs),
|
||||||
Reconfigure(EndpointReconfigureCmdArgs),
|
Reconfigure(EndpointReconfigureCmdArgs),
|
||||||
Stop(EndpointStopCmdArgs),
|
Stop(EndpointStopCmdArgs),
|
||||||
|
GenerateJwt(EndpointGenerateJwtCmdArgs),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(clap::Args)]
|
#[derive(clap::Args)]
|
||||||
@@ -699,6 +700,13 @@ struct EndpointStopCmdArgs {
|
|||||||
mode: String,
|
mode: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(clap::Args)]
|
||||||
|
#[clap(about = "Generate a JWT for an endpoint")]
|
||||||
|
struct EndpointGenerateJwtCmdArgs {
|
||||||
|
#[clap(help = "Postgres endpoint id")]
|
||||||
|
endpoint_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(clap::Subcommand)]
|
#[derive(clap::Subcommand)]
|
||||||
#[clap(about = "Manage neon_local branch name mappings")]
|
#[clap(about = "Manage neon_local branch name mappings")]
|
||||||
enum MappingsCmd {
|
enum MappingsCmd {
|
||||||
@@ -1528,6 +1536,16 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
|||||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||||
endpoint.stop(&args.mode, args.destroy)?;
|
endpoint.stop(&args.mode, args.destroy)?;
|
||||||
}
|
}
|
||||||
|
EndpointCmd::GenerateJwt(args) => {
|
||||||
|
let endpoint_id = &args.endpoint_id;
|
||||||
|
let endpoint = cplane
|
||||||
|
.endpoints
|
||||||
|
.get(endpoint_id)
|
||||||
|
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||||
|
let jwt = endpoint.generate_jwt()?;
|
||||||
|
|
||||||
|
print!("{jwt}");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -42,22 +42,30 @@ use std::path::PathBuf;
|
|||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use anyhow::{Context, Result, anyhow, bail};
|
use anyhow::{Context, Result, anyhow, bail};
|
||||||
use compute_api::requests::ConfigurationRequest;
|
use compute_api::requests::{ComputeClaims, ConfigurationRequest};
|
||||||
use compute_api::responses::{
|
use compute_api::responses::{
|
||||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse,
|
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
|
||||||
};
|
};
|
||||||
use compute_api::spec::{
|
use compute_api::spec::{
|
||||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
||||||
RemoteExtSpec, Role,
|
RemoteExtSpec, Role,
|
||||||
};
|
};
|
||||||
|
use jsonwebtoken::jwk::{
|
||||||
|
AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
|
||||||
|
OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
|
||||||
|
};
|
||||||
use nix::sys::signal::{Signal, kill};
|
use nix::sys::signal::{Signal, kill};
|
||||||
use pageserver_api::shard::ShardStripeSize;
|
use pageserver_api::shard::ShardStripeSize;
|
||||||
|
use pem::Pem;
|
||||||
use reqwest::header::CONTENT_TYPE;
|
use reqwest::header::CONTENT_TYPE;
|
||||||
use safekeeper_api::membership::SafekeeperGeneration;
|
use safekeeper_api::membership::SafekeeperGeneration;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sha2::{Digest, Sha256};
|
||||||
|
use spki::der::Decode;
|
||||||
|
use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use url::Host;
|
use url::Host;
|
||||||
use utils::id::{NodeId, TenantId, TimelineId};
|
use utils::id::{NodeId, TenantId, TimelineId};
|
||||||
@@ -82,6 +90,7 @@ pub struct EndpointConf {
|
|||||||
drop_subscriptions_before_start: bool,
|
drop_subscriptions_before_start: bool,
|
||||||
features: Vec<ComputeFeature>,
|
features: Vec<ComputeFeature>,
|
||||||
cluster: Option<Cluster>,
|
cluster: Option<Cluster>,
|
||||||
|
compute_ctl_config: ComputeCtlConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -137,6 +146,37 @@ impl ComputeControlPlane {
|
|||||||
.unwrap_or(self.base_port)
|
.unwrap_or(self.base_port)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a JSON Web Key Set. This ideally matches the way we create a JWKS
|
||||||
|
/// from the production control plane.
|
||||||
|
fn create_jwks_from_pem(pem: &Pem) -> Result<JwkSet> {
|
||||||
|
let spki: SubjectPublicKeyInfoRef = SubjectPublicKeyInfo::from_der(pem.contents())?;
|
||||||
|
let public_key = spki.subject_public_key.raw_bytes();
|
||||||
|
|
||||||
|
let mut hasher = Sha256::new();
|
||||||
|
hasher.update(public_key);
|
||||||
|
let key_hash = hasher.finalize();
|
||||||
|
|
||||||
|
Ok(JwkSet {
|
||||||
|
keys: vec![Jwk {
|
||||||
|
common: CommonParameters {
|
||||||
|
public_key_use: Some(PublicKeyUse::Signature),
|
||||||
|
key_operations: Some(vec![KeyOperations::Verify]),
|
||||||
|
key_algorithm: Some(KeyAlgorithm::EdDSA),
|
||||||
|
key_id: Some(base64::encode_config(key_hash, base64::URL_SAFE_NO_PAD)),
|
||||||
|
x509_url: None::<String>,
|
||||||
|
x509_chain: None::<Vec<String>>,
|
||||||
|
x509_sha1_fingerprint: None::<String>,
|
||||||
|
x509_sha256_fingerprint: None::<String>,
|
||||||
|
},
|
||||||
|
algorithm: AlgorithmParameters::OctetKeyPair(OctetKeyPairParameters {
|
||||||
|
key_type: OctetKeyPairType::OctetKeyPair,
|
||||||
|
curve: EllipticCurve::Ed25519,
|
||||||
|
x: base64::encode_config(public_key, base64::URL_SAFE_NO_PAD),
|
||||||
|
}),
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn new_endpoint(
|
pub fn new_endpoint(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -154,6 +194,10 @@ impl ComputeControlPlane {
|
|||||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||||
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||||
let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);
|
let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);
|
||||||
|
let compute_ctl_config = ComputeCtlConfig {
|
||||||
|
jwks: Self::create_jwks_from_pem(&self.env.read_public_key()?)?,
|
||||||
|
tls: None::<TlsConfig>,
|
||||||
|
};
|
||||||
let ep = Arc::new(Endpoint {
|
let ep = Arc::new(Endpoint {
|
||||||
endpoint_id: endpoint_id.to_owned(),
|
endpoint_id: endpoint_id.to_owned(),
|
||||||
pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),
|
pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),
|
||||||
@@ -181,6 +225,7 @@ impl ComputeControlPlane {
|
|||||||
reconfigure_concurrency: 1,
|
reconfigure_concurrency: 1,
|
||||||
features: vec![],
|
features: vec![],
|
||||||
cluster: None,
|
cluster: None,
|
||||||
|
compute_ctl_config: compute_ctl_config.clone(),
|
||||||
});
|
});
|
||||||
|
|
||||||
ep.create_endpoint_dir()?;
|
ep.create_endpoint_dir()?;
|
||||||
@@ -200,6 +245,7 @@ impl ComputeControlPlane {
|
|||||||
reconfigure_concurrency: 1,
|
reconfigure_concurrency: 1,
|
||||||
features: vec![],
|
features: vec![],
|
||||||
cluster: None,
|
cluster: None,
|
||||||
|
compute_ctl_config,
|
||||||
})?,
|
})?,
|
||||||
)?;
|
)?;
|
||||||
std::fs::write(
|
std::fs::write(
|
||||||
@@ -242,7 +288,6 @@ impl ComputeControlPlane {
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Endpoint {
|
pub struct Endpoint {
|
||||||
/// used as the directory name
|
/// used as the directory name
|
||||||
endpoint_id: String,
|
endpoint_id: String,
|
||||||
@@ -271,6 +316,9 @@ pub struct Endpoint {
|
|||||||
features: Vec<ComputeFeature>,
|
features: Vec<ComputeFeature>,
|
||||||
// Cluster settings
|
// Cluster settings
|
||||||
cluster: Option<Cluster>,
|
cluster: Option<Cluster>,
|
||||||
|
|
||||||
|
/// The compute_ctl config for the endpoint's compute.
|
||||||
|
compute_ctl_config: ComputeCtlConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq)]
|
#[derive(PartialEq, Eq)]
|
||||||
@@ -333,6 +381,7 @@ impl Endpoint {
|
|||||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||||
features: conf.features,
|
features: conf.features,
|
||||||
cluster: conf.cluster,
|
cluster: conf.cluster,
|
||||||
|
compute_ctl_config: conf.compute_ctl_config,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -580,6 +629,13 @@ impl Endpoint {
|
|||||||
Ok(safekeeper_connstrings)
|
Ok(safekeeper_connstrings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a JWT with the correct claims.
|
||||||
|
pub fn generate_jwt(&self) -> Result<String> {
|
||||||
|
self.env.generate_auth_token(&ComputeClaims {
|
||||||
|
compute_id: self.endpoint_id.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn start(
|
pub async fn start(
|
||||||
&self,
|
&self,
|
||||||
@@ -706,14 +762,10 @@ impl Endpoint {
|
|||||||
|
|
||||||
ComputeConfig {
|
ComputeConfig {
|
||||||
spec: Some(spec),
|
spec: Some(spec),
|
||||||
compute_ctl_config: ComputeCtlConfig::default(),
|
compute_ctl_config: self.compute_ctl_config.clone(),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO(tristan957): Remove the write to spec.json after compatibility
|
|
||||||
// tests work themselves out
|
|
||||||
let spec_path = self.endpoint_path().join("spec.json");
|
|
||||||
std::fs::write(spec_path, serde_json::to_string_pretty(&config.spec)?)?;
|
|
||||||
let config_path = self.endpoint_path().join("config.json");
|
let config_path = self.endpoint_path().join("config.json");
|
||||||
std::fs::write(config_path, serde_json::to_string_pretty(&config)?)?;
|
std::fs::write(config_path, serde_json::to_string_pretty(&config)?)?;
|
||||||
|
|
||||||
@@ -723,16 +775,6 @@ impl Endpoint {
|
|||||||
.append(true)
|
.append(true)
|
||||||
.open(self.endpoint_path().join("compute.log"))?;
|
.open(self.endpoint_path().join("compute.log"))?;
|
||||||
|
|
||||||
// TODO(tristan957): Remove when compatibility tests are no longer an
|
|
||||||
// issue
|
|
||||||
let old_compute_ctl = {
|
|
||||||
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
|
|
||||||
let help_output = cmd.arg("--help").output()?;
|
|
||||||
let help_output = String::from_utf8_lossy(&help_output.stdout);
|
|
||||||
|
|
||||||
!help_output.contains("--config")
|
|
||||||
};
|
|
||||||
|
|
||||||
// Launch compute_ctl
|
// Launch compute_ctl
|
||||||
let conn_str = self.connstr("cloud_admin", "postgres");
|
let conn_str = self.connstr("cloud_admin", "postgres");
|
||||||
println!("Starting postgres node at '{}'", conn_str);
|
println!("Starting postgres node at '{}'", conn_str);
|
||||||
@@ -751,19 +793,8 @@ impl Endpoint {
|
|||||||
])
|
])
|
||||||
.args(["--pgdata", self.pgdata().to_str().unwrap()])
|
.args(["--pgdata", self.pgdata().to_str().unwrap()])
|
||||||
.args(["--connstr", &conn_str])
|
.args(["--connstr", &conn_str])
|
||||||
// TODO(tristan957): Change this to --config when compatibility tests
|
.arg("--config")
|
||||||
// are no longer an issue
|
.arg(self.endpoint_path().join("config.json").as_os_str())
|
||||||
.args([
|
|
||||||
"--spec-path",
|
|
||||||
self.endpoint_path()
|
|
||||||
.join(if old_compute_ctl {
|
|
||||||
"spec.json"
|
|
||||||
} else {
|
|
||||||
"config.json"
|
|
||||||
})
|
|
||||||
.to_str()
|
|
||||||
.unwrap(),
|
|
||||||
])
|
|
||||||
.args([
|
.args([
|
||||||
"--pgbin",
|
"--pgbin",
|
||||||
self.env
|
self.env
|
||||||
@@ -774,16 +805,7 @@ impl Endpoint {
|
|||||||
])
|
])
|
||||||
// TODO: It would be nice if we generated compute IDs with the same
|
// TODO: It would be nice if we generated compute IDs with the same
|
||||||
// algorithm as the real control plane.
|
// algorithm as the real control plane.
|
||||||
.args([
|
.args(["--compute-id", &self.endpoint_id])
|
||||||
"--compute-id",
|
|
||||||
&format!(
|
|
||||||
"compute-{}",
|
|
||||||
SystemTime::now()
|
|
||||||
.duration_since(UNIX_EPOCH)
|
|
||||||
.unwrap()
|
|
||||||
.as_secs()
|
|
||||||
),
|
|
||||||
])
|
|
||||||
.stdin(std::process::Stdio::null())
|
.stdin(std::process::Stdio::null())
|
||||||
.stderr(logfile.try_clone()?)
|
.stderr(logfile.try_clone()?)
|
||||||
.stdout(logfile);
|
.stdout(logfile);
|
||||||
@@ -881,6 +903,7 @@ impl Endpoint {
|
|||||||
self.external_http_address.port()
|
self.external_http_address.port()
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
.bearer_auth(self.generate_jwt()?)
|
||||||
.send()
|
.send()
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -957,6 +980,7 @@ impl Endpoint {
|
|||||||
self.external_http_address.port()
|
self.external_http_address.port()
|
||||||
))
|
))
|
||||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||||
|
.bearer_auth(self.generate_jwt()?)
|
||||||
.body(
|
.body(
|
||||||
serde_json::to_string(&ConfigurationRequest {
|
serde_json::to_string(&ConfigurationRequest {
|
||||||
spec,
|
spec,
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ use std::{env, fs};
|
|||||||
|
|
||||||
use anyhow::{Context, bail};
|
use anyhow::{Context, bail};
|
||||||
use clap::ValueEnum;
|
use clap::ValueEnum;
|
||||||
|
use pem::Pem;
|
||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -22,7 +23,7 @@ use crate::object_storage::{OBJECT_STORAGE_REMOTE_STORAGE_DIR, ObjectStorage};
|
|||||||
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
|
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
|
||||||
use crate::safekeeper::SafekeeperNode;
|
use crate::safekeeper::SafekeeperNode;
|
||||||
|
|
||||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
pub const DEFAULT_PG_VERSION: u32 = 17;
|
||||||
|
|
||||||
//
|
//
|
||||||
// This data structures represents neon_local CLI config
|
// This data structures represents neon_local CLI config
|
||||||
@@ -56,6 +57,7 @@ pub struct LocalEnv {
|
|||||||
|
|
||||||
// used to issue tokens during e.g pg start
|
// used to issue tokens during e.g pg start
|
||||||
pub private_key_path: PathBuf,
|
pub private_key_path: PathBuf,
|
||||||
|
/// Path to environment's public key
|
||||||
pub public_key_path: PathBuf,
|
pub public_key_path: PathBuf,
|
||||||
|
|
||||||
pub broker: NeonBroker,
|
pub broker: NeonBroker,
|
||||||
@@ -758,11 +760,11 @@ impl LocalEnv {
|
|||||||
|
|
||||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||||
pub fn generate_auth_token<S: Serialize>(&self, claims: &S) -> anyhow::Result<String> {
|
pub fn generate_auth_token<S: Serialize>(&self, claims: &S) -> anyhow::Result<String> {
|
||||||
let private_key_path = self.get_private_key_path();
|
let key = self.read_private_key()?;
|
||||||
let key_data = fs::read(private_key_path)?;
|
encode_from_key_file(claims, &key)
|
||||||
encode_from_key_file(claims, &key_data)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the path to the private key.
|
||||||
pub fn get_private_key_path(&self) -> PathBuf {
|
pub fn get_private_key_path(&self) -> PathBuf {
|
||||||
if self.private_key_path.is_absolute() {
|
if self.private_key_path.is_absolute() {
|
||||||
self.private_key_path.to_path_buf()
|
self.private_key_path.to_path_buf()
|
||||||
@@ -771,6 +773,29 @@ impl LocalEnv {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the path to the public key.
|
||||||
|
pub fn get_public_key_path(&self) -> PathBuf {
|
||||||
|
if self.public_key_path.is_absolute() {
|
||||||
|
self.public_key_path.to_path_buf()
|
||||||
|
} else {
|
||||||
|
self.base_data_dir.join(&self.public_key_path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the contents of the private key file.
|
||||||
|
pub fn read_private_key(&self) -> anyhow::Result<Pem> {
|
||||||
|
let private_key_path = self.get_private_key_path();
|
||||||
|
let pem = pem::parse(fs::read(private_key_path)?)?;
|
||||||
|
Ok(pem)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the contents of the public key file.
|
||||||
|
pub fn read_public_key(&self) -> anyhow::Result<Pem> {
|
||||||
|
let public_key_path = self.get_public_key_path();
|
||||||
|
let pem = pem::parse(fs::read(public_key_path)?)?;
|
||||||
|
Ok(pem)
|
||||||
|
}
|
||||||
|
|
||||||
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
|
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
|
||||||
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
|
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
|
||||||
let base_path = base_path();
|
let base_path = base_path();
|
||||||
@@ -956,6 +981,7 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
|||||||
String::from_utf8_lossy(&keygen_output.stderr)
|
String::from_utf8_lossy(&keygen_output.stderr)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract the public key from the private key file
|
// Extract the public key from the private key file
|
||||||
//
|
//
|
||||||
// openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem
|
// openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem
|
||||||
@@ -972,6 +998,7 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
|||||||
String::from_utf8_lossy(&keygen_output.stderr)
|
String::from_utf8_lossy(&keygen_output.stderr)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -413,6 +413,11 @@ impl PageServerNode {
|
|||||||
.map(serde_json::from_str)
|
.map(serde_json::from_str)
|
||||||
.transpose()
|
.transpose()
|
||||||
.context("Failed to parse 'compaction_algorithm' json")?,
|
.context("Failed to parse 'compaction_algorithm' json")?,
|
||||||
|
compaction_shard_ancestor: settings
|
||||||
|
.remove("compaction_shard_ancestor")
|
||||||
|
.map(|x| x.parse::<bool>())
|
||||||
|
.transpose()
|
||||||
|
.context("Failed to parse 'compaction_shard_ancestor' as a bool")?,
|
||||||
compaction_l0_first: settings
|
compaction_l0_first: settings
|
||||||
.remove("compaction_l0_first")
|
.remove("compaction_l0_first")
|
||||||
.map(|x| x.parse::<bool>())
|
.map(|x| x.parse::<bool>())
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ use pageserver_api::models::{
|
|||||||
};
|
};
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||||
|
use pem::Pem;
|
||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
use reqwest::{Certificate, Method};
|
use reqwest::{Certificate, Method};
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
@@ -34,8 +35,8 @@ use crate::local_env::{LocalEnv, NeonStorageControllerConf};
|
|||||||
|
|
||||||
pub struct StorageController {
|
pub struct StorageController {
|
||||||
env: LocalEnv,
|
env: LocalEnv,
|
||||||
private_key: Option<Vec<u8>>,
|
private_key: Option<Pem>,
|
||||||
public_key: Option<String>,
|
public_key: Option<Pem>,
|
||||||
client: reqwest::Client,
|
client: reqwest::Client,
|
||||||
config: NeonStorageControllerConf,
|
config: NeonStorageControllerConf,
|
||||||
|
|
||||||
@@ -116,7 +117,9 @@ impl StorageController {
|
|||||||
AuthType::Trust => (None, None),
|
AuthType::Trust => (None, None),
|
||||||
AuthType::NeonJWT => {
|
AuthType::NeonJWT => {
|
||||||
let private_key_path = env.get_private_key_path();
|
let private_key_path = env.get_private_key_path();
|
||||||
let private_key = fs::read(private_key_path).expect("failed to read private key");
|
let private_key =
|
||||||
|
pem::parse(fs::read(private_key_path).expect("failed to read private key"))
|
||||||
|
.expect("failed to parse PEM file");
|
||||||
|
|
||||||
// If pageserver auth is enabled, this implicitly enables auth for this service,
|
// If pageserver auth is enabled, this implicitly enables auth for this service,
|
||||||
// using the same credentials.
|
// using the same credentials.
|
||||||
@@ -138,9 +141,13 @@ impl StorageController {
|
|||||||
.expect("Empty key dir")
|
.expect("Empty key dir")
|
||||||
.expect("Error reading key dir");
|
.expect("Error reading key dir");
|
||||||
|
|
||||||
std::fs::read_to_string(dent.path()).expect("Can't read public key")
|
pem::parse(std::fs::read_to_string(dent.path()).expect("Can't read public key"))
|
||||||
|
.expect("Failed to parse PEM file")
|
||||||
} else {
|
} else {
|
||||||
std::fs::read_to_string(&public_key_path).expect("Can't read public key")
|
pem::parse(
|
||||||
|
std::fs::read_to_string(&public_key_path).expect("Can't read public key"),
|
||||||
|
)
|
||||||
|
.expect("Failed to parse PEM file")
|
||||||
};
|
};
|
||||||
(Some(private_key), Some(public_key))
|
(Some(private_key), Some(public_key))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
# Example docker compose configuration
|
# Example docker compose configuration
|
||||||
|
|
||||||
The configuration in this directory is used for testing Neon docker images: it is
|
The configuration in this directory is used for testing Neon docker images: it is
|
||||||
@@ -8,3 +7,13 @@ you can experiment with a miniature Neon system, use `cargo neon` rather than co
|
|||||||
This configuration does not start the storage controller, because the controller
|
This configuration does not start the storage controller, because the controller
|
||||||
needs a way to reconfigure running computes, and no such thing exists in this setup.
|
needs a way to reconfigure running computes, and no such thing exists in this setup.
|
||||||
|
|
||||||
|
## Generating the JWKS for a compute
|
||||||
|
|
||||||
|
```shell
|
||||||
|
openssl genpkey -algorithm Ed25519 -out private-key.pem
|
||||||
|
openssl pkey -in private-key.pem -pubout -out public-key.pem
|
||||||
|
openssl pkey -pubin -inform pem -in public-key.pem -pubout -outform der -out public-key.der
|
||||||
|
key="$(xxd -plain -cols 32 -s -32 public-key.der)"
|
||||||
|
key_id="$(printf '%s' "$key" | sha256sum | awk '{ print $1 }' | basenc --base64url --wrap=0)"
|
||||||
|
x="$(printf '%s' "$key" | basenc --base64url --wrap=0)"
|
||||||
|
```
|
||||||
|
|||||||
3
docker-compose/compute_wrapper/private-key.pem
Normal file
3
docker-compose/compute_wrapper/private-key.pem
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
-----BEGIN PRIVATE KEY-----
|
||||||
|
MC4CAQAwBQYDK2VwBCIEIOmnRbzt2AJ0d+S3aU1hiYOl/tXpvz1FmWBfwHYBgOma
|
||||||
|
-----END PRIVATE KEY-----
|
||||||
BIN
docker-compose/compute_wrapper/public-key.der
Normal file
BIN
docker-compose/compute_wrapper/public-key.der
Normal file
Binary file not shown.
3
docker-compose/compute_wrapper/public-key.pem
Normal file
3
docker-compose/compute_wrapper/public-key.pem
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
-----BEGIN PUBLIC KEY-----
|
||||||
|
MCowBQYDK2VwAyEADY0al/U0bgB3+9fUGk+3PKWnsck9OyxN5DjHIN6Xep0=
|
||||||
|
-----END PUBLIC KEY-----
|
||||||
@@ -81,19 +81,9 @@ sed -i "s/TIMELINE_ID/${timeline_id}/" ${CONFIG_FILE}
|
|||||||
|
|
||||||
cat ${CONFIG_FILE}
|
cat ${CONFIG_FILE}
|
||||||
|
|
||||||
# TODO(tristan957): Remove these workarounds for backwards compatibility after
|
|
||||||
# the next compute release. That includes these next few lines and the
|
|
||||||
# --spec-path in the compute_ctl invocation.
|
|
||||||
if compute_ctl --help | grep --quiet -- '--config'; then
|
|
||||||
SPEC_PATH="$CONFIG_FILE"
|
|
||||||
else
|
|
||||||
jq '.spec' < "$CONFIG_FILE" > /tmp/spec.json
|
|
||||||
SPEC_PATH=/tmp/spec.json
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Start compute node"
|
echo "Start compute node"
|
||||||
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
|
||||||
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
-C "postgresql://cloud_admin@localhost:55433/postgres" \
|
||||||
-b /usr/local/bin/postgres \
|
-b /usr/local/bin/postgres \
|
||||||
--compute-id "compute-$RANDOM" \
|
--compute-id "compute-$RANDOM" \
|
||||||
--spec-path "$SPEC_PATH"
|
--config "$CONFIG_FILE"
|
||||||
|
|||||||
@@ -142,7 +142,19 @@
|
|||||||
},
|
},
|
||||||
"compute_ctl_config": {
|
"compute_ctl_config": {
|
||||||
"jwks": {
|
"jwks": {
|
||||||
"keys": []
|
"keys": [
|
||||||
|
{
|
||||||
|
"use": "sig",
|
||||||
|
"key_ops": [
|
||||||
|
"verify"
|
||||||
|
],
|
||||||
|
"alg": "EdDSA",
|
||||||
|
"kid": "ZGIxMzAzOGY0YWQwODk2ODU1MTk1NzMxMDFkYmUyOWU2NzZkOWNjNjMyMGRkZGJjOWY0MjdjYWVmNzE1MjUyOAo=",
|
||||||
|
"kty": "OKP",
|
||||||
|
"crv": "Ed25519",
|
||||||
|
"x": "MGQ4ZDFhOTdmNTM0NmUwMDc3ZmJkN2Q0MWE0ZmI3M2NhNWE3YjFjOTNkM2IyYzRkZTQzOGM3MjBkZTk3N2E5ZAo="
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
8
docker-compose/ext-src/pg_jsonschema-src/Makefile
Normal file
8
docker-compose/ext-src/pg_jsonschema-src/Makefile
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
EXTENSION = pg_jsonschema
|
||||||
|
DATA = pg_jsonschema--1.0.sql
|
||||||
|
REGRESS = jsonschema_valid_api jsonschema_edge_cases
|
||||||
|
REGRESS_OPTS = --load-extension=pg_jsonschema
|
||||||
|
|
||||||
|
PG_CONFIG ?= pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
-- Schema with enums, nulls, extra properties disallowed
|
||||||
|
SELECT jsonschema_is_valid('{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json);
|
||||||
|
jsonschema_is_valid
|
||||||
|
---------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Valid enum and null email
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "active", "email": null}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
------------------------------
|
||||||
|
{}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Invalid enum value
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "disabled", "email": null}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
{"\"disabled\" is not one of [\"active\",\"inactive\",\"pending\"]"}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Invalid email format (assuming format is validated)
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "active", "email": "not-an-email"}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
-----------------------------------------
|
||||||
|
{"\"not-an-email\" is not a \"email\""}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Extra property not allowed
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "active", "extra": "should not be here"}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
{"Additional properties are not allowed ('extra' was unexpected)"}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
-- Define schema
|
||||||
|
SELECT jsonschema_is_valid('{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json);
|
||||||
|
jsonschema_is_valid
|
||||||
|
---------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Valid instance
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json,
|
||||||
|
'{"username": "alice", "age": 25}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
------------------------------
|
||||||
|
{}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Invalid instance: missing required "username"
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json,
|
||||||
|
'{"age": 25}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
-----------------------------------------
|
||||||
|
{"\"username\" is a required property"}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Invalid instance: wrong type for "age"
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json,
|
||||||
|
'{"username": "bob", "age": "twenty"}'::json
|
||||||
|
);
|
||||||
|
jsonschema_validation_errors
|
||||||
|
-------------------------------------------
|
||||||
|
{"\"twenty\" is not of type \"integer\""}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
-- Schema with enums, nulls, extra properties disallowed
|
||||||
|
SELECT jsonschema_is_valid('{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json);
|
||||||
|
|
||||||
|
-- Valid enum and null email
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "active", "email": null}'::json
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Invalid enum value
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "disabled", "email": null}'::json
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Invalid email format (assuming format is validated)
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "active", "email": "not-an-email"}'::json
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Extra property not allowed
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"status": { "type": "string", "enum": ["active", "inactive", "pending"] },
|
||||||
|
"email": { "type": ["string", "null"], "format": "email" }
|
||||||
|
},
|
||||||
|
"required": ["status"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}'::json,
|
||||||
|
'{"status": "active", "extra": "should not be here"}'::json
|
||||||
|
);
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
-- Define schema
|
||||||
|
SELECT jsonschema_is_valid('{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json);
|
||||||
|
|
||||||
|
-- Valid instance
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json,
|
||||||
|
'{"username": "alice", "age": 25}'::json
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Invalid instance: missing required "username"
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json,
|
||||||
|
'{"age": 25}'::json
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Invalid instance: wrong type for "age"
|
||||||
|
SELECT jsonschema_validation_errors(
|
||||||
|
'{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"username": { "type": "string" },
|
||||||
|
"age": { "type": "integer" }
|
||||||
|
},
|
||||||
|
"required": ["username"]
|
||||||
|
}'::json,
|
||||||
|
'{"username": "bob", "age": "twenty"}'::json
|
||||||
|
);
|
||||||
9
docker-compose/ext-src/pg_session_jwt-src/Makefile
Normal file
9
docker-compose/ext-src/pg_session_jwt-src/Makefile
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
EXTENSION = pg_session_jwt
|
||||||
|
|
||||||
|
REGRESS = basic_functions
|
||||||
|
REGRESS_OPTS = --load-extension=$(EXTENSION)
|
||||||
|
export PGOPTIONS = -c pg_session_jwt.jwk={"crv":"Ed25519","kty":"OKP","x":"R_Abz-63zJ00l-IraL5fQhwkhGVZCSooQFV5ntC3C7M"}
|
||||||
|
|
||||||
|
PG_CONFIG ?= pg_config
|
||||||
|
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||||
|
include $(PGXS)
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
-- Basic functionality tests for pg_session_jwt
|
||||||
|
-- Test auth.init() function
|
||||||
|
SELECT auth.init();
|
||||||
|
init
|
||||||
|
------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Test an invalid JWT
|
||||||
|
SELECT auth.jwt_session_init('INVALID-JWT');
|
||||||
|
ERROR: invalid JWT encoding
|
||||||
|
-- Test creating a session with an expired JWT
|
||||||
|
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
|
||||||
|
ERROR: Token used after it has expired
|
||||||
|
-- Test creating a session with a valid JWT
|
||||||
|
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
|
||||||
|
jwt_session_init
|
||||||
|
------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Test auth.session() function
|
||||||
|
SELECT auth.session();
|
||||||
|
session
|
||||||
|
-------------------------------------------------------------------------
|
||||||
|
{"exp": 4896164252, "iat": 1742564252, "jti": 434343, "sub": "user123"}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Test auth.user_id() function
|
||||||
|
SELECT auth.user_id() AS user_id;
|
||||||
|
user_id
|
||||||
|
---------
|
||||||
|
user123
|
||||||
|
(1 row)
|
||||||
|
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
-- Basic functionality tests for pg_session_jwt
|
||||||
|
|
||||||
|
-- Test auth.init() function
|
||||||
|
SELECT auth.init();
|
||||||
|
|
||||||
|
-- Test an invalid JWT
|
||||||
|
SELECT auth.jwt_session_init('INVALID-JWT');
|
||||||
|
|
||||||
|
-- Test creating a session with an expired JWT
|
||||||
|
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
|
||||||
|
|
||||||
|
-- Test creating a session with a valid JWT
|
||||||
|
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
|
||||||
|
|
||||||
|
-- Test auth.session() function
|
||||||
|
SELECT auth.session();
|
||||||
|
|
||||||
|
-- Test auth.user_id() function
|
||||||
|
SELECT auth.user_id() AS user_id;
|
||||||
@@ -160,7 +160,7 @@ pub struct CatalogObjects {
|
|||||||
pub databases: Vec<Database>,
|
pub databases: Vec<Database>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
|
||||||
pub struct ComputeCtlConfig {
|
pub struct ComputeCtlConfig {
|
||||||
/// Set of JSON web keys that the compute can use to authenticate
|
/// Set of JSON web keys that the compute can use to authenticate
|
||||||
/// communication from the control plane.
|
/// communication from the control plane.
|
||||||
@@ -179,7 +179,7 @@ impl Default for ComputeCtlConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
|
||||||
pub struct TlsConfig {
|
pub struct TlsConfig {
|
||||||
pub key_path: String,
|
pub key_path: String,
|
||||||
pub cert_path: String,
|
pub cert_path: String,
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ futures.workspace = true
|
|||||||
hyper0.workspace = true
|
hyper0.workspace = true
|
||||||
itertools.workspace = true
|
itertools.workspace = true
|
||||||
jemalloc_pprof.workspace = true
|
jemalloc_pprof.workspace = true
|
||||||
|
jsonwebtoken.workspace = true
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
pprof.workspace = true
|
pprof.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use bytes::{Bytes, BytesMut};
|
|||||||
use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName};
|
use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName};
|
||||||
use hyper::http::HeaderValue;
|
use hyper::http::HeaderValue;
|
||||||
use hyper::{Body, Method, Request, Response};
|
use hyper::{Body, Method, Request, Response};
|
||||||
|
use jsonwebtoken::TokenData;
|
||||||
use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter};
|
use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pprof::ProfilerGuardBuilder;
|
use pprof::ProfilerGuardBuilder;
|
||||||
@@ -618,7 +619,7 @@ pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
|
|||||||
})?;
|
})?;
|
||||||
let token = parse_token(header_value)?;
|
let token = parse_token(header_value)?;
|
||||||
|
|
||||||
let data = auth.decode(token).map_err(|err| {
|
let data: TokenData<Claims> = auth.decode(token).map_err(|err| {
|
||||||
warn!("Authentication error: {err}");
|
warn!("Authentication error: {err}");
|
||||||
// Rely on From<AuthError> for ApiError impl
|
// Rely on From<AuthError> for ApiError impl
|
||||||
err
|
err
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ nix = {workspace = true, optional = true}
|
|||||||
reqwest.workspace = true
|
reqwest.workspace = true
|
||||||
rand.workspace = true
|
rand.workspace = true
|
||||||
tracing-utils.workspace = true
|
tracing-utils.workspace = true
|
||||||
|
once_cell.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
bincode.workspace = true
|
bincode.workspace = true
|
||||||
|
|||||||
@@ -379,6 +379,8 @@ pub struct TenantConfigToml {
|
|||||||
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
/// size exceeds `compaction_upper_limit * checkpoint_distance`.
|
||||||
pub compaction_upper_limit: usize,
|
pub compaction_upper_limit: usize,
|
||||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||||
|
/// If true, enable shard ancestor compaction (enabled by default).
|
||||||
|
pub compaction_shard_ancestor: bool,
|
||||||
/// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
|
/// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
|
||||||
/// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
|
/// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
|
||||||
pub compaction_l0_first: bool,
|
pub compaction_l0_first: bool,
|
||||||
@@ -677,6 +679,7 @@ pub mod tenant_conf_defaults {
|
|||||||
|
|
||||||
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
|
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
|
||||||
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
|
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
|
||||||
|
pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;
|
||||||
|
|
||||||
// This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
|
// This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
|
||||||
// 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
|
// 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
|
||||||
@@ -734,6 +737,7 @@ impl Default for TenantConfigToml {
|
|||||||
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
|
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
|
||||||
kind: DEFAULT_COMPACTION_ALGORITHM,
|
kind: DEFAULT_COMPACTION_ALGORITHM,
|
||||||
},
|
},
|
||||||
|
compaction_shard_ancestor: DEFAULT_COMPACTION_SHARD_ANCESTOR,
|
||||||
compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
|
compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
|
||||||
compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
|
compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
|
||||||
l0_flush_delay_threshold: None,
|
l0_flush_delay_threshold: None,
|
||||||
|
|||||||
@@ -526,6 +526,8 @@ pub struct TenantConfigPatch {
|
|||||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||||
pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
|
pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
|
||||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||||
|
pub compaction_shard_ancestor: FieldPatch<bool>,
|
||||||
|
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||||
pub compaction_l0_first: FieldPatch<bool>,
|
pub compaction_l0_first: FieldPatch<bool>,
|
||||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||||
pub compaction_l0_semaphore: FieldPatch<bool>,
|
pub compaction_l0_semaphore: FieldPatch<bool>,
|
||||||
@@ -615,6 +617,9 @@ pub struct TenantConfig {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub compaction_shard_ancestor: Option<bool>,
|
||||||
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub compaction_l0_first: Option<bool>,
|
pub compaction_l0_first: Option<bool>,
|
||||||
|
|
||||||
@@ -724,6 +729,7 @@ impl TenantConfig {
|
|||||||
mut compaction_threshold,
|
mut compaction_threshold,
|
||||||
mut compaction_upper_limit,
|
mut compaction_upper_limit,
|
||||||
mut compaction_algorithm,
|
mut compaction_algorithm,
|
||||||
|
mut compaction_shard_ancestor,
|
||||||
mut compaction_l0_first,
|
mut compaction_l0_first,
|
||||||
mut compaction_l0_semaphore,
|
mut compaction_l0_semaphore,
|
||||||
mut l0_flush_delay_threshold,
|
mut l0_flush_delay_threshold,
|
||||||
@@ -772,6 +778,9 @@ impl TenantConfig {
|
|||||||
.compaction_upper_limit
|
.compaction_upper_limit
|
||||||
.apply(&mut compaction_upper_limit);
|
.apply(&mut compaction_upper_limit);
|
||||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||||
|
patch
|
||||||
|
.compaction_shard_ancestor
|
||||||
|
.apply(&mut compaction_shard_ancestor);
|
||||||
patch.compaction_l0_first.apply(&mut compaction_l0_first);
|
patch.compaction_l0_first.apply(&mut compaction_l0_first);
|
||||||
patch
|
patch
|
||||||
.compaction_l0_semaphore
|
.compaction_l0_semaphore
|
||||||
@@ -860,6 +869,7 @@ impl TenantConfig {
|
|||||||
compaction_threshold,
|
compaction_threshold,
|
||||||
compaction_upper_limit,
|
compaction_upper_limit,
|
||||||
compaction_algorithm,
|
compaction_algorithm,
|
||||||
|
compaction_shard_ancestor,
|
||||||
compaction_l0_first,
|
compaction_l0_first,
|
||||||
compaction_l0_semaphore,
|
compaction_l0_semaphore,
|
||||||
l0_flush_delay_threshold,
|
l0_flush_delay_threshold,
|
||||||
@@ -920,6 +930,9 @@ impl TenantConfig {
|
|||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap_or(&global_conf.compaction_algorithm)
|
.unwrap_or(&global_conf.compaction_algorithm)
|
||||||
.clone(),
|
.clone(),
|
||||||
|
compaction_shard_ancestor: self
|
||||||
|
.compaction_shard_ancestor
|
||||||
|
.unwrap_or(global_conf.compaction_shard_ancestor),
|
||||||
compaction_l0_first: self
|
compaction_l0_first: self
|
||||||
.compaction_l0_first
|
.compaction_l0_first
|
||||||
.unwrap_or(global_conf.compaction_l0_first),
|
.unwrap_or(global_conf.compaction_l0_first),
|
||||||
@@ -1804,8 +1817,34 @@ pub mod virtual_file {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl IoMode {
|
impl IoMode {
|
||||||
pub const fn preferred() -> Self {
|
pub fn preferred() -> Self {
|
||||||
Self::Buffered
|
// The default behavior when running Rust unit tests without any further
|
||||||
|
// flags is to use the newest behavior if available on the platform (Direct).
|
||||||
|
// The CI uses the following environment variable to unit tests for all
|
||||||
|
// different modes.
|
||||||
|
// NB: the Python regression & perf tests have their own defaults management
|
||||||
|
// that writes pageserver.toml; they do not use this variable.
|
||||||
|
if cfg!(test) {
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
static CACHED: Lazy<IoMode> = Lazy::new(|| {
|
||||||
|
utils::env::var_serde_json_string(
|
||||||
|
"NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
|
||||||
|
)
|
||||||
|
.unwrap_or({
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
{
|
||||||
|
IoMode::Direct
|
||||||
|
}
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
|
{
|
||||||
|
IoMode::Buffered
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
*CACHED
|
||||||
|
} else {
|
||||||
|
IoMode::Buffered
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ futures = { workspace = true }
|
|||||||
jsonwebtoken.workspace = true
|
jsonwebtoken.workspace = true
|
||||||
nix = { workspace = true, features = ["ioctl"] }
|
nix = { workspace = true, features = ["ioctl"] }
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
|
pem.workspace = true
|
||||||
pin-project-lite.workspace = true
|
pin-project-lite.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
|
|||||||
@@ -11,7 +11,8 @@ use camino::Utf8Path;
|
|||||||
use jsonwebtoken::{
|
use jsonwebtoken::{
|
||||||
Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,
|
Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,
|
||||||
};
|
};
|
||||||
use serde::{Deserialize, Serialize};
|
use pem::Pem;
|
||||||
|
use serde::{Deserialize, Serialize, de::DeserializeOwned};
|
||||||
|
|
||||||
use crate::id::TenantId;
|
use crate::id::TenantId;
|
||||||
|
|
||||||
@@ -73,7 +74,10 @@ impl SwappableJwtAuth {
|
|||||||
pub fn swap(&self, jwt_auth: JwtAuth) {
|
pub fn swap(&self, jwt_auth: JwtAuth) {
|
||||||
self.0.swap(Arc::new(jwt_auth));
|
self.0.swap(Arc::new(jwt_auth));
|
||||||
}
|
}
|
||||||
pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
|
pub fn decode<D: DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
token: &str,
|
||||||
|
) -> std::result::Result<TokenData<D>, AuthError> {
|
||||||
self.0.load().decode(token)
|
self.0.load().decode(token)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -148,7 +152,10 @@ impl JwtAuth {
|
|||||||
/// The function tries the stored decoding keys in succession,
|
/// The function tries the stored decoding keys in succession,
|
||||||
/// and returns the first yielding a successful result.
|
/// and returns the first yielding a successful result.
|
||||||
/// If there is no working decoding key, it returns the last error.
|
/// If there is no working decoding key, it returns the last error.
|
||||||
pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
|
pub fn decode<D: DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
token: &str,
|
||||||
|
) -> std::result::Result<TokenData<D>, AuthError> {
|
||||||
let mut res = None;
|
let mut res = None;
|
||||||
for decoding_key in &self.decoding_keys {
|
for decoding_key in &self.decoding_keys {
|
||||||
res = Some(decode(token, decoding_key, &self.validation));
|
res = Some(decode(token, decoding_key, &self.validation));
|
||||||
@@ -173,8 +180,8 @@ impl std::fmt::Debug for JwtAuth {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// this function is used only for testing purposes in CLI e g generate tokens during init
|
// this function is used only for testing purposes in CLI e g generate tokens during init
|
||||||
pub fn encode_from_key_file<S: Serialize>(claims: &S, key_data: &[u8]) -> Result<String> {
|
pub fn encode_from_key_file<S: Serialize>(claims: &S, pem: &Pem) -> Result<String> {
|
||||||
let key = EncodingKey::from_ed_pem(key_data)?;
|
let key = EncodingKey::from_ed_der(pem.contents());
|
||||||
Ok(encode(&Header::new(STORAGE_TOKEN_ALGORITHM), claims, &key)?)
|
Ok(encode(&Header::new(STORAGE_TOKEN_ALGORITHM), claims, &key)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,13 +195,13 @@ mod tests {
|
|||||||
//
|
//
|
||||||
// openssl genpkey -algorithm ed25519 -out ed25519-priv.pem
|
// openssl genpkey -algorithm ed25519 -out ed25519-priv.pem
|
||||||
// openssl pkey -in ed25519-priv.pem -pubout -out ed25519-pub.pem
|
// openssl pkey -in ed25519-priv.pem -pubout -out ed25519-pub.pem
|
||||||
const TEST_PUB_KEY_ED25519: &[u8] = br#"
|
const TEST_PUB_KEY_ED25519: &str = r#"
|
||||||
-----BEGIN PUBLIC KEY-----
|
-----BEGIN PUBLIC KEY-----
|
||||||
MCowBQYDK2VwAyEARYwaNBayR+eGI0iXB4s3QxE3Nl2g1iWbr6KtLWeVD/w=
|
MCowBQYDK2VwAyEARYwaNBayR+eGI0iXB4s3QxE3Nl2g1iWbr6KtLWeVD/w=
|
||||||
-----END PUBLIC KEY-----
|
-----END PUBLIC KEY-----
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
const TEST_PRIV_KEY_ED25519: &[u8] = br#"
|
const TEST_PRIV_KEY_ED25519: &str = r#"
|
||||||
-----BEGIN PRIVATE KEY-----
|
-----BEGIN PRIVATE KEY-----
|
||||||
MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||||
-----END PRIVATE KEY-----
|
-----END PRIVATE KEY-----
|
||||||
@@ -222,9 +229,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
|||||||
|
|
||||||
// Check it can be validated with the public key
|
// Check it can be validated with the public key
|
||||||
let auth = JwtAuth::new(vec![
|
let auth = JwtAuth::new(vec![
|
||||||
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(),
|
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),
|
||||||
]);
|
]);
|
||||||
let claims_from_token = auth.decode(encoded_eddsa).unwrap().claims;
|
let claims_from_token: Claims = auth.decode(encoded_eddsa).unwrap().claims;
|
||||||
assert_eq!(claims_from_token, expected_claims);
|
assert_eq!(claims_from_token, expected_claims);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -235,13 +242,14 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
|||||||
scope: Scope::Tenant,
|
scope: Scope::Tenant,
|
||||||
};
|
};
|
||||||
|
|
||||||
let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519).unwrap();
|
let pem = pem::parse(TEST_PRIV_KEY_ED25519).unwrap();
|
||||||
|
let encoded = encode_from_key_file(&claims, &pem).unwrap();
|
||||||
|
|
||||||
// decode it back
|
// decode it back
|
||||||
let auth = JwtAuth::new(vec![
|
let auth = JwtAuth::new(vec![
|
||||||
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(),
|
DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),
|
||||||
]);
|
]);
|
||||||
let decoded = auth.decode(&encoded).unwrap();
|
let decoded: TokenData<Claims> = auth.decode(&encoded).unwrap();
|
||||||
|
|
||||||
assert_eq!(decoded.claims, claims);
|
assert_eq!(decoded.claims, claims);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ humantime.workspace = true
|
|||||||
humantime-serde.workspace = true
|
humantime-serde.workspace = true
|
||||||
hyper0.workspace = true
|
hyper0.workspace = true
|
||||||
itertools.workspace = true
|
itertools.workspace = true
|
||||||
|
jsonwebtoken.workspace = true
|
||||||
md5.workspace = true
|
md5.workspace = true
|
||||||
nix.workspace = true
|
nix.workspace = true
|
||||||
# hack to get the number of worker threads tokio uses
|
# hack to get the number of worker threads tokio uses
|
||||||
|
|||||||
@@ -118,13 +118,13 @@ pub struct PageServerConf {
|
|||||||
/// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
|
/// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
|
||||||
pub concurrent_tenant_warmup: ConfigurableSemaphore,
|
pub concurrent_tenant_warmup: ConfigurableSemaphore,
|
||||||
|
|
||||||
/// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
|
/// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
|
||||||
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
||||||
/// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
|
/// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
|
||||||
/// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
|
/// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
|
||||||
/// See the comment in `eviction_task` for details.
|
/// See the comment in `eviction_task` for details.
|
||||||
///
|
///
|
||||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||||
pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
|
pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
|
||||||
|
|
||||||
// How often to collect metrics and send them to the metrics endpoint.
|
// How often to collect metrics and send them to the metrics endpoint.
|
||||||
@@ -588,10 +588,10 @@ impl ConfigurableSemaphore {
|
|||||||
/// Initializse using a non-zero amount of permits.
|
/// Initializse using a non-zero amount of permits.
|
||||||
///
|
///
|
||||||
/// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
|
/// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
|
||||||
/// feature such as [`Tenant::gather_size_inputs`]. Otherwise any semaphore using future will
|
/// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||||
/// behave like [`futures::future::pending`], just waiting until new permits are added.
|
/// behave like [`futures::future::pending`], just waiting until new permits are added.
|
||||||
///
|
///
|
||||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||||
pub fn new(initial_permits: NonZeroUsize) -> Self {
|
pub fn new(initial_permits: NonZeroUsize) -> Self {
|
||||||
ConfigurableSemaphore {
|
ConfigurableSemaphore {
|
||||||
initial_permits,
|
initial_permits,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
|
|||||||
use crate::tenant::mgr::TenantManager;
|
use crate::tenant::mgr::TenantManager;
|
||||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||||
use crate::tenant::tasks::BackgroundLoopKind;
|
use crate::tenant::tasks::BackgroundLoopKind;
|
||||||
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
|
use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
|
||||||
|
|
||||||
mod disk_cache;
|
mod disk_cache;
|
||||||
mod metrics;
|
mod metrics;
|
||||||
@@ -428,7 +428,7 @@ async fn calculate_synthetic_size_worker(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
|
async fn calculate_and_log(tenant: &TenantShard, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||||
const CAUSE: LogicalSizeCalculationCause =
|
const CAUSE: LogicalSizeCalculationCause =
|
||||||
LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
|
LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
|
||||||
|
|
||||||
|
|||||||
@@ -175,9 +175,9 @@ impl MetricsKey {
|
|||||||
.absolute_values()
|
.absolute_values()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [`Tenant::remote_size`]
|
/// [`TenantShard::remote_size`]
|
||||||
///
|
///
|
||||||
/// [`Tenant::remote_size`]: crate::tenant::Tenant::remote_size
|
/// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size
|
||||||
const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||||
MetricsKey {
|
MetricsKey {
|
||||||
tenant_id,
|
tenant_id,
|
||||||
@@ -199,9 +199,9 @@ impl MetricsKey {
|
|||||||
.absolute_values()
|
.absolute_values()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [`Tenant::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
/// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||||
///
|
///
|
||||||
/// [`Tenant::cached_synthetic_size`]: crate::tenant::Tenant::cached_synthetic_size
|
/// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
|
||||||
/// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker
|
/// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker
|
||||||
const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||||
MetricsKey {
|
MetricsKey {
|
||||||
@@ -254,7 +254,7 @@ pub(super) async fn collect_all_metrics(
|
|||||||
|
|
||||||
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
|
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
|
||||||
where
|
where
|
||||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::Tenant>)>,
|
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::TenantShard>)>,
|
||||||
{
|
{
|
||||||
let mut current_metrics: Vec<NewRawMetric> = Vec::new();
|
let mut current_metrics: Vec<NewRawMetric> = Vec::new();
|
||||||
|
|
||||||
@@ -308,7 +308,7 @@ impl TenantSnapshot {
|
|||||||
///
|
///
|
||||||
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
||||||
/// cannot just list timelines here.
|
/// cannot just list timelines here.
|
||||||
fn collect(t: &Arc<crate::tenant::Tenant>, resident_size: u64) -> Self {
|
fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
|
||||||
TenantSnapshot {
|
TenantSnapshot {
|
||||||
resident_size,
|
resident_size,
|
||||||
remote_size: t.remote_size(),
|
remote_size: t.remote_size(),
|
||||||
|
|||||||
@@ -1873,7 +1873,7 @@ async fn update_tenant_config_handler(
|
|||||||
&ShardParameters::default(),
|
&ShardParameters::default(),
|
||||||
);
|
);
|
||||||
|
|
||||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||||
|
|
||||||
@@ -1917,7 +1917,7 @@ async fn patch_tenant_config_handler(
|
|||||||
&ShardParameters::default(),
|
&ShardParameters::default(),
|
||||||
);
|
);
|
||||||
|
|
||||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||||
|
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ use tracing::{info, info_span};
|
|||||||
/// backwards-compatible changes to the metadata format.
|
/// backwards-compatible changes to the metadata format.
|
||||||
pub const STORAGE_FORMAT_VERSION: u16 = 3;
|
pub const STORAGE_FORMAT_VERSION: u16 = 3;
|
||||||
|
|
||||||
pub const DEFAULT_PG_VERSION: u32 = 16;
|
pub const DEFAULT_PG_VERSION: u32 = 17;
|
||||||
|
|
||||||
// Magic constants used to identify different kinds of files
|
// Magic constants used to identify different kinds of files
|
||||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||||
|
|||||||
@@ -1086,7 +1086,7 @@ pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
|
|||||||
.expect("Failed to register metric")
|
.expect("Failed to register metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
|
/// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
|
||||||
/// like how long it took to load.
|
/// like how long it took to load.
|
||||||
///
|
///
|
||||||
/// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
|
/// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ use async_compression::tokio::write::GzipEncoder;
|
|||||||
use bytes::Buf;
|
use bytes::Buf;
|
||||||
use futures::FutureExt;
|
use futures::FutureExt;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
use jsonwebtoken::TokenData;
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
use pageserver_api::config::{
|
use pageserver_api::config::{
|
||||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||||
@@ -75,7 +76,7 @@ use crate::tenant::timeline::{self, WaitLsnError};
|
|||||||
use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
|
use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
|
||||||
use crate::{basebackup, timed_after_cancellation};
|
use crate::{basebackup, timed_after_cancellation};
|
||||||
|
|
||||||
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
|
/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::TenantShard`] which
|
||||||
/// is not yet in state [`TenantState::Active`].
|
/// is not yet in state [`TenantState::Active`].
|
||||||
///
|
///
|
||||||
/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
|
/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
|
||||||
@@ -2837,7 +2838,7 @@ where
|
|||||||
) -> Result<(), QueryError> {
|
) -> Result<(), QueryError> {
|
||||||
// this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
|
// this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
|
||||||
// which requires auth to be present
|
// which requires auth to be present
|
||||||
let data = self
|
let data: TokenData<Claims> = self
|
||||||
.auth
|
.auth
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ pub struct TenantSharedResources {
|
|||||||
pub l0_flush_global_state: L0FlushGlobalState,
|
pub l0_flush_global_state: L0FlushGlobalState,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A [`Tenant`] is really an _attached_ tenant. The configuration
|
/// A [`TenantShard`] is really an _attached_ tenant. The configuration
|
||||||
/// for an attached tenant is a subset of the [`LocationConf`], represented
|
/// for an attached tenant is a subset of the [`LocationConf`], represented
|
||||||
/// in this struct.
|
/// in this struct.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -245,7 +245,7 @@ pub(crate) enum SpawnMode {
|
|||||||
///
|
///
|
||||||
/// Tenant consists of multiple timelines. Keep them in a hash table.
|
/// Tenant consists of multiple timelines. Keep them in a hash table.
|
||||||
///
|
///
|
||||||
pub struct Tenant {
|
pub struct TenantShard {
|
||||||
// Global pageserver config parameters
|
// Global pageserver config parameters
|
||||||
pub conf: &'static PageServerConf,
|
pub conf: &'static PageServerConf,
|
||||||
|
|
||||||
@@ -267,7 +267,7 @@ pub struct Tenant {
|
|||||||
shard_identity: ShardIdentity,
|
shard_identity: ShardIdentity,
|
||||||
|
|
||||||
/// The remote storage generation, used to protect S3 objects from split-brain.
|
/// The remote storage generation, used to protect S3 objects from split-brain.
|
||||||
/// Does not change over the lifetime of the [`Tenant`] object.
|
/// Does not change over the lifetime of the [`TenantShard`] object.
|
||||||
///
|
///
|
||||||
/// This duplicates the generation stored in LocationConf, but that structure is mutable:
|
/// This duplicates the generation stored in LocationConf, but that structure is mutable:
|
||||||
/// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.
|
/// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.
|
||||||
@@ -309,7 +309,7 @@ pub struct Tenant {
|
|||||||
// Access to global deletion queue for when this tenant wants to schedule a deletion
|
// Access to global deletion queue for when this tenant wants to schedule a deletion
|
||||||
deletion_queue_client: DeletionQueueClient,
|
deletion_queue_client: DeletionQueueClient,
|
||||||
|
|
||||||
/// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
|
/// Cached logical sizes updated updated on each [`TenantShard::gather_size_inputs`].
|
||||||
cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
|
cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
|
||||||
cached_synthetic_tenant_size: Arc<AtomicU64>,
|
cached_synthetic_tenant_size: Arc<AtomicU64>,
|
||||||
|
|
||||||
@@ -337,12 +337,12 @@ pub struct Tenant {
|
|||||||
// Timelines' cancellation token.
|
// Timelines' cancellation token.
|
||||||
pub(crate) cancel: CancellationToken,
|
pub(crate) cancel: CancellationToken,
|
||||||
|
|
||||||
// Users of the Tenant such as the page service must take this Gate to avoid
|
// Users of the TenantShard such as the page service must take this Gate to avoid
|
||||||
// trying to use a Tenant which is shutting down.
|
// trying to use a TenantShard which is shutting down.
|
||||||
pub(crate) gate: Gate,
|
pub(crate) gate: Gate,
|
||||||
|
|
||||||
/// Throttle applied at the top of [`Timeline::get`].
|
/// Throttle applied at the top of [`Timeline::get`].
|
||||||
/// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
|
/// All [`TenantShard::timelines`] of a given [`TenantShard`] instance share the same [`throttle::Throttle`] instance.
|
||||||
pub(crate) pagestream_throttle: Arc<throttle::Throttle>,
|
pub(crate) pagestream_throttle: Arc<throttle::Throttle>,
|
||||||
|
|
||||||
pub(crate) pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
|
pub(crate) pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
|
||||||
@@ -362,7 +362,7 @@ pub struct Tenant {
|
|||||||
|
|
||||||
l0_flush_global_state: L0FlushGlobalState,
|
l0_flush_global_state: L0FlushGlobalState,
|
||||||
}
|
}
|
||||||
impl std::fmt::Debug for Tenant {
|
impl std::fmt::Debug for TenantShard {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
|
write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
|
||||||
}
|
}
|
||||||
@@ -841,7 +841,7 @@ impl Debug for SetStoppingError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Arguments to [`Tenant::create_timeline`].
|
/// Arguments to [`TenantShard::create_timeline`].
|
||||||
///
|
///
|
||||||
/// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]
|
/// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]
|
||||||
/// is `None`, the result of the timeline create call is not deterministic.
|
/// is `None`, the result of the timeline create call is not deterministic.
|
||||||
@@ -876,7 +876,7 @@ pub(crate) struct CreateTimelineParamsImportPgdata {
|
|||||||
pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in [`Tenant::start_creating_timeline`] in [`Tenant::start_creating_timeline`].
|
/// What is used to determine idempotency of a [`TenantShard::create_timeline`] call in [`TenantShard::start_creating_timeline`] in [`TenantShard::start_creating_timeline`].
|
||||||
///
|
///
|
||||||
/// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
|
/// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
|
||||||
///
|
///
|
||||||
@@ -914,7 +914,7 @@ pub(crate) struct CreatingTimelineIdempotencyImportPgdata {
|
|||||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// What is returned by [`Tenant::start_creating_timeline`].
|
/// What is returned by [`TenantShard::start_creating_timeline`].
|
||||||
#[must_use]
|
#[must_use]
|
||||||
enum StartCreatingTimelineResult {
|
enum StartCreatingTimelineResult {
|
||||||
CreateGuard(TimelineCreateGuard),
|
CreateGuard(TimelineCreateGuard),
|
||||||
@@ -943,13 +943,13 @@ struct TimelineInitAndSyncNeedsSpawnImportPgdata {
|
|||||||
guard: TimelineCreateGuard,
|
guard: TimelineCreateGuard,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// What is returned by [`Tenant::create_timeline`].
|
/// What is returned by [`TenantShard::create_timeline`].
|
||||||
enum CreateTimelineResult {
|
enum CreateTimelineResult {
|
||||||
Created(Arc<Timeline>),
|
Created(Arc<Timeline>),
|
||||||
Idempotent(Arc<Timeline>),
|
Idempotent(Arc<Timeline>),
|
||||||
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`Tenant::timelines`] when
|
/// IMPORTANT: This [`Arc<Timeline>`] object is not in [`TenantShard::timelines`] when
|
||||||
/// we return this result, nor will this concrete object ever be added there.
|
/// we return this result, nor will this concrete object ever be added there.
|
||||||
/// Cf method comment on [`Tenant::create_timeline_import_pgdata`].
|
/// Cf method comment on [`TenantShard::create_timeline_import_pgdata`].
|
||||||
ImportSpawned(Arc<Timeline>),
|
ImportSpawned(Arc<Timeline>),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1082,7 +1082,7 @@ pub(crate) enum LoadConfigError {
|
|||||||
NotFound(Utf8PathBuf),
|
NotFound(Utf8PathBuf),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tenant {
|
impl TenantShard {
|
||||||
/// Yet another helper for timeline initialization.
|
/// Yet another helper for timeline initialization.
|
||||||
///
|
///
|
||||||
/// - Initializes the Timeline struct and inserts it into the tenant's hash map
|
/// - Initializes the Timeline struct and inserts it into the tenant's hash map
|
||||||
@@ -1303,7 +1303,7 @@ impl Tenant {
|
|||||||
init_order: Option<InitializationOrder>,
|
init_order: Option<InitializationOrder>,
|
||||||
mode: SpawnMode,
|
mode: SpawnMode,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<Arc<Tenant>, GlobalShutDown> {
|
) -> Result<Arc<TenantShard>, GlobalShutDown> {
|
||||||
let wal_redo_manager =
|
let wal_redo_manager =
|
||||||
WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?;
|
WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?;
|
||||||
|
|
||||||
@@ -1317,7 +1317,7 @@ impl Tenant {
|
|||||||
let attach_mode = attached_conf.location.attach_mode;
|
let attach_mode = attached_conf.location.attach_mode;
|
||||||
let generation = attached_conf.location.generation;
|
let generation = attached_conf.location.generation;
|
||||||
|
|
||||||
let tenant = Arc::new(Tenant::new(
|
let tenant = Arc::new(TenantShard::new(
|
||||||
TenantState::Attaching,
|
TenantState::Attaching,
|
||||||
conf,
|
conf,
|
||||||
attached_conf,
|
attached_conf,
|
||||||
@@ -1334,7 +1334,7 @@ impl Tenant {
|
|||||||
let attach_gate_guard = tenant
|
let attach_gate_guard = tenant
|
||||||
.gate
|
.gate
|
||||||
.enter()
|
.enter()
|
||||||
.expect("We just created the Tenant: nothing else can have shut it down yet");
|
.expect("We just created the TenantShard: nothing else can have shut it down yet");
|
||||||
|
|
||||||
// Do all the hard work in the background
|
// Do all the hard work in the background
|
||||||
let tenant_clone = Arc::clone(&tenant);
|
let tenant_clone = Arc::clone(&tenant);
|
||||||
@@ -1362,7 +1362,7 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_broken_or_stopping(t: &Tenant, err: anyhow::Error) {
|
fn make_broken_or_stopping(t: &TenantShard, err: anyhow::Error) {
|
||||||
t.state.send_modify(|state| match state {
|
t.state.send_modify(|state| match state {
|
||||||
// TODO: the old code alluded to DeleteTenantFlow sometimes setting
|
// TODO: the old code alluded to DeleteTenantFlow sometimes setting
|
||||||
// TenantState::Stopping before we get here, but this may be outdated.
|
// TenantState::Stopping before we get here, but this may be outdated.
|
||||||
@@ -1627,7 +1627,7 @@ impl Tenant {
|
|||||||
/// No background tasks are started as part of this routine.
|
/// No background tasks are started as part of this routine.
|
||||||
///
|
///
|
||||||
async fn attach(
|
async fn attach(
|
||||||
self: &Arc<Tenant>,
|
self: &Arc<TenantShard>,
|
||||||
preload: Option<TenantPreload>,
|
preload: Option<TenantPreload>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
@@ -1957,7 +1957,7 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn load_timelines_metadata(
|
async fn load_timelines_metadata(
|
||||||
self: &Arc<Tenant>,
|
self: &Arc<TenantShard>,
|
||||||
timeline_ids: HashSet<TimelineId>,
|
timeline_ids: HashSet<TimelineId>,
|
||||||
remote_storage: &GenericRemoteStorage,
|
remote_storage: &GenericRemoteStorage,
|
||||||
heatmap: Option<(HeatMapTenant, std::time::Instant)>,
|
heatmap: Option<(HeatMapTenant, std::time::Instant)>,
|
||||||
@@ -2028,7 +2028,7 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn load_timeline_metadata(
|
fn load_timeline_metadata(
|
||||||
self: &Arc<Tenant>,
|
self: &Arc<TenantShard>,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
remote_storage: GenericRemoteStorage,
|
remote_storage: GenericRemoteStorage,
|
||||||
previous_heatmap: Option<PreviousHeatmap>,
|
previous_heatmap: Option<PreviousHeatmap>,
|
||||||
@@ -2429,14 +2429,14 @@ impl Tenant {
|
|||||||
/// This is used by tests & import-from-basebackup.
|
/// This is used by tests & import-from-basebackup.
|
||||||
///
|
///
|
||||||
/// The returned [`UninitializedTimeline`] contains no data nor metadata and it is in
|
/// The returned [`UninitializedTimeline`] contains no data nor metadata and it is in
|
||||||
/// a state that will fail [`Tenant::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
|
/// a state that will fail [`TenantShard::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
|
||||||
///
|
///
|
||||||
/// The caller is responsible for getting the timeline into a state that will be accepted
|
/// The caller is responsible for getting the timeline into a state that will be accepted
|
||||||
/// by [`Tenant::load_remote_timeline`] / [`Tenant::attach`].
|
/// by [`TenantShard::load_remote_timeline`] / [`TenantShard::attach`].
|
||||||
/// Then they may call [`UninitializedTimeline::finish_creation`] to add the timeline
|
/// Then they may call [`UninitializedTimeline::finish_creation`] to add the timeline
|
||||||
/// to the [`Tenant::timelines`].
|
/// to the [`TenantShard::timelines`].
|
||||||
///
|
///
|
||||||
/// Tests should use `Tenant::create_test_timeline` to set up the minimum required metadata keys.
|
/// Tests should use `TenantShard::create_test_timeline` to set up the minimum required metadata keys.
|
||||||
pub(crate) async fn create_empty_timeline(
|
pub(crate) async fn create_empty_timeline(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
new_timeline_id: TimelineId,
|
new_timeline_id: TimelineId,
|
||||||
@@ -2584,7 +2584,7 @@ impl Tenant {
|
|||||||
/// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
|
/// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub(crate) async fn create_timeline(
|
pub(crate) async fn create_timeline(
|
||||||
self: &Arc<Tenant>,
|
self: &Arc<TenantShard>,
|
||||||
params: CreateTimelineParams,
|
params: CreateTimelineParams,
|
||||||
broker_client: storage_broker::BrokerClientChannel,
|
broker_client: storage_broker::BrokerClientChannel,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -2751,13 +2751,13 @@ impl Tenant {
|
|||||||
Ok(activated_timeline)
|
Ok(activated_timeline)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The returned [`Arc<Timeline>`] is NOT in the [`Tenant::timelines`] map until the import
|
/// The returned [`Arc<Timeline>`] is NOT in the [`TenantShard::timelines`] map until the import
|
||||||
/// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
|
/// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
|
||||||
/// [`Tenant::timelines`] map when the import completes.
|
/// [`TenantShard::timelines`] map when the import completes.
|
||||||
/// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
|
/// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
|
||||||
/// for the response.
|
/// for the response.
|
||||||
async fn create_timeline_import_pgdata(
|
async fn create_timeline_import_pgdata(
|
||||||
self: &Arc<Tenant>,
|
self: &Arc<Self>,
|
||||||
params: CreateTimelineParamsImportPgdata,
|
params: CreateTimelineParamsImportPgdata,
|
||||||
activate: ActivateTimelineArgs,
|
activate: ActivateTimelineArgs,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -2854,7 +2854,7 @@ impl Tenant {
|
|||||||
|
|
||||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
|
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
|
||||||
async fn create_timeline_import_pgdata_task(
|
async fn create_timeline_import_pgdata_task(
|
||||||
self: Arc<Tenant>,
|
self: Arc<TenantShard>,
|
||||||
timeline: Arc<Timeline>,
|
timeline: Arc<Timeline>,
|
||||||
index_part: import_pgdata::index_part_format::Root,
|
index_part: import_pgdata::index_part_format::Root,
|
||||||
activate: ActivateTimelineArgs,
|
activate: ActivateTimelineArgs,
|
||||||
@@ -2882,7 +2882,7 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn create_timeline_import_pgdata_task_impl(
|
async fn create_timeline_import_pgdata_task_impl(
|
||||||
self: Arc<Tenant>,
|
self: Arc<TenantShard>,
|
||||||
timeline: Arc<Timeline>,
|
timeline: Arc<Timeline>,
|
||||||
index_part: import_pgdata::index_part_format::Root,
|
index_part: import_pgdata::index_part_format::Root,
|
||||||
activate: ActivateTimelineArgs,
|
activate: ActivateTimelineArgs,
|
||||||
@@ -2899,10 +2899,10 @@ impl Tenant {
|
|||||||
// Reload timeline from remote.
|
// Reload timeline from remote.
|
||||||
// This proves that the remote state is attachable, and it reuses the code.
|
// This proves that the remote state is attachable, and it reuses the code.
|
||||||
//
|
//
|
||||||
// TODO: think about whether this is safe to do with concurrent Tenant::shutdown.
|
// TODO: think about whether this is safe to do with concurrent TenantShard::shutdown.
|
||||||
// timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
|
// timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
|
||||||
// But our activate() call might launch new background tasks after Tenant::shutdown
|
// But our activate() call might launch new background tasks after TenantShard::shutdown
|
||||||
// already went past shutting down the Tenant::timelines, which this timeline here is no part of.
|
// already went past shutting down the TenantShard::timelines, which this timeline here is no part of.
|
||||||
// I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
|
// I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
|
||||||
// down while bootstrapping/branching + activating), but, the race condition is much more likely
|
// down while bootstrapping/branching + activating), but, the race condition is much more likely
|
||||||
// to manifest because of the long runtime of this import task.
|
// to manifest because of the long runtime of this import task.
|
||||||
@@ -2917,7 +2917,7 @@ impl Tenant {
|
|||||||
// };
|
// };
|
||||||
let timeline_id = timeline.timeline_id;
|
let timeline_id = timeline.timeline_id;
|
||||||
|
|
||||||
// load from object storage like Tenant::attach does
|
// load from object storage like TenantShard::attach does
|
||||||
let resources = self.build_timeline_resources(timeline_id);
|
let resources = self.build_timeline_resources(timeline_id);
|
||||||
let index_part = resources
|
let index_part = resources
|
||||||
.remote_client
|
.remote_client
|
||||||
@@ -3938,7 +3938,7 @@ enum ActivateTimelineArgs {
|
|||||||
No,
|
No,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tenant {
|
impl TenantShard {
|
||||||
pub fn tenant_specific_overrides(&self) -> pageserver_api::models::TenantConfig {
|
pub fn tenant_specific_overrides(&self) -> pageserver_api::models::TenantConfig {
|
||||||
self.tenant_conf.load().tenant_conf.clone()
|
self.tenant_conf.load().tenant_conf.clone()
|
||||||
}
|
}
|
||||||
@@ -4096,7 +4096,7 @@ impl Tenant {
|
|||||||
update: F,
|
update: F,
|
||||||
) -> anyhow::Result<pageserver_api::models::TenantConfig> {
|
) -> anyhow::Result<pageserver_api::models::TenantConfig> {
|
||||||
// Use read-copy-update in order to avoid overwriting the location config
|
// Use read-copy-update in order to avoid overwriting the location config
|
||||||
// state if this races with [`Tenant::set_new_location_config`]. Note that
|
// state if this races with [`TenantShard::set_new_location_config`]. Note that
|
||||||
// this race is not possible if both request types come from the storage
|
// this race is not possible if both request types come from the storage
|
||||||
// controller (as they should!) because an exclusive op lock is required
|
// controller (as they should!) because an exclusive op lock is required
|
||||||
// on the storage controller side.
|
// on the storage controller side.
|
||||||
@@ -4219,7 +4219,7 @@ impl Tenant {
|
|||||||
Ok((timeline, timeline_ctx))
|
Ok((timeline, timeline_ctx))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [`Tenant::shutdown`] must be called before dropping the returned [`Tenant`] object
|
/// [`TenantShard::shutdown`] must be called before dropping the returned [`TenantShard`] object
|
||||||
/// to ensure proper cleanup of background tasks and metrics.
|
/// to ensure proper cleanup of background tasks and metrics.
|
||||||
//
|
//
|
||||||
// Allow too_many_arguments because a constructor's argument list naturally grows with the
|
// Allow too_many_arguments because a constructor's argument list naturally grows with the
|
||||||
@@ -4235,7 +4235,7 @@ impl Tenant {
|
|||||||
remote_storage: GenericRemoteStorage,
|
remote_storage: GenericRemoteStorage,
|
||||||
deletion_queue_client: DeletionQueueClient,
|
deletion_queue_client: DeletionQueueClient,
|
||||||
l0_flush_global_state: L0FlushGlobalState,
|
l0_flush_global_state: L0FlushGlobalState,
|
||||||
) -> Tenant {
|
) -> TenantShard {
|
||||||
debug_assert!(
|
debug_assert!(
|
||||||
!attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
|
!attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
|
||||||
);
|
);
|
||||||
@@ -4295,7 +4295,7 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
Tenant {
|
TenantShard {
|
||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
shard_identity,
|
shard_identity,
|
||||||
generation: attached_conf.location.generation,
|
generation: attached_conf.location.generation,
|
||||||
@@ -4330,7 +4330,7 @@ impl Tenant {
|
|||||||
cancel: CancellationToken::default(),
|
cancel: CancellationToken::default(),
|
||||||
gate: Gate::default(),
|
gate: Gate::default(),
|
||||||
pagestream_throttle: Arc::new(throttle::Throttle::new(
|
pagestream_throttle: Arc::new(throttle::Throttle::new(
|
||||||
Tenant::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
TenantShard::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
|
||||||
)),
|
)),
|
||||||
pagestream_throttle_metrics: Arc::new(
|
pagestream_throttle_metrics: Arc::new(
|
||||||
crate::metrics::tenant_throttling::Pagestream::new(&tenant_shard_id),
|
crate::metrics::tenant_throttling::Pagestream::new(&tenant_shard_id),
|
||||||
@@ -4466,11 +4466,11 @@ impl Tenant {
|
|||||||
|
|
||||||
// Perform GC for each timeline.
|
// Perform GC for each timeline.
|
||||||
//
|
//
|
||||||
// Note that we don't hold the `Tenant::gc_cs` lock here because we don't want to delay the
|
// Note that we don't hold the `TenantShard::gc_cs` lock here because we don't want to delay the
|
||||||
// branch creation task, which requires the GC lock. A GC iteration can run concurrently
|
// branch creation task, which requires the GC lock. A GC iteration can run concurrently
|
||||||
// with branch creation.
|
// with branch creation.
|
||||||
//
|
//
|
||||||
// See comments in [`Tenant::branch_timeline`] for more information about why branch
|
// See comments in [`TenantShard::branch_timeline`] for more information about why branch
|
||||||
// creation task can run concurrently with timeline's GC iteration.
|
// creation task can run concurrently with timeline's GC iteration.
|
||||||
for timeline in gc_timelines {
|
for timeline in gc_timelines {
|
||||||
if cancel.is_cancelled() {
|
if cancel.is_cancelled() {
|
||||||
@@ -4500,7 +4500,7 @@ impl Tenant {
|
|||||||
|
|
||||||
/// Refreshes the Timeline::gc_info for all timelines, returning the
|
/// Refreshes the Timeline::gc_info for all timelines, returning the
|
||||||
/// vector of timelines which have [`Timeline::get_last_record_lsn`] past
|
/// vector of timelines which have [`Timeline::get_last_record_lsn`] past
|
||||||
/// [`Tenant::get_gc_horizon`].
|
/// [`TenantShard::get_gc_horizon`].
|
||||||
///
|
///
|
||||||
/// This is usually executed as part of periodic gc, but can now be triggered more often.
|
/// This is usually executed as part of periodic gc, but can now be triggered more often.
|
||||||
pub(crate) async fn refresh_gc_info(
|
pub(crate) async fn refresh_gc_info(
|
||||||
@@ -5499,7 +5499,7 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The flushes we did above were just writes, but the Tenant might have had
|
// The flushes we did above were just writes, but the TenantShard might have had
|
||||||
// pending deletions as well from recent compaction/gc: we want to flush those
|
// pending deletions as well from recent compaction/gc: we want to flush those
|
||||||
// as well. This requires flushing the global delete queue. This is cheap
|
// as well. This requires flushing the global delete queue. This is cheap
|
||||||
// because it's typically a no-op.
|
// because it's typically a no-op.
|
||||||
@@ -5517,7 +5517,7 @@ impl Tenant {
|
|||||||
|
|
||||||
/// How much local storage would this tenant like to have? It can cope with
|
/// How much local storage would this tenant like to have? It can cope with
|
||||||
/// less than this (via eviction and on-demand downloads), but this function enables
|
/// less than this (via eviction and on-demand downloads), but this function enables
|
||||||
/// the Tenant to advertise how much storage it would prefer to have to provide fast I/O
|
/// the TenantShard to advertise how much storage it would prefer to have to provide fast I/O
|
||||||
/// by keeping important things on local disk.
|
/// by keeping important things on local disk.
|
||||||
///
|
///
|
||||||
/// This is a heuristic, not a guarantee: tenants that are long-idle will actually use less
|
/// This is a heuristic, not a guarantee: tenants that are long-idle will actually use less
|
||||||
@@ -5540,11 +5540,11 @@ impl Tenant {
|
|||||||
/// manifest in `Self::remote_tenant_manifest`.
|
/// manifest in `Self::remote_tenant_manifest`.
|
||||||
///
|
///
|
||||||
/// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after
|
/// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after
|
||||||
/// changing any `Tenant` state that's included in the manifest, consider making the manifest
|
/// changing any `TenantShard` state that's included in the manifest, consider making the manifest
|
||||||
/// the authoritative source of data with an API that automatically uploads on changes. Revisit
|
/// the authoritative source of data with an API that automatically uploads on changes. Revisit
|
||||||
/// this when the manifest is more widely used and we have a better idea of the data model.
|
/// this when the manifest is more widely used and we have a better idea of the data model.
|
||||||
pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {
|
pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {
|
||||||
// Multiple tasks may call this function concurrently after mutating the Tenant runtime
|
// Multiple tasks may call this function concurrently after mutating the TenantShard runtime
|
||||||
// state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex
|
// state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex
|
||||||
// to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but
|
// to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but
|
||||||
// simple coalescing mechanism.
|
// simple coalescing mechanism.
|
||||||
@@ -5812,7 +5812,7 @@ pub(crate) mod harness {
|
|||||||
info_span!("TenantHarness", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())
|
info_span!("TenantHarness", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn load(&self) -> (Arc<Tenant>, RequestContext) {
|
pub(crate) async fn load(&self) -> (Arc<TenantShard>, RequestContext) {
|
||||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
|
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
|
||||||
.with_scope_unit_test();
|
.with_scope_unit_test();
|
||||||
(
|
(
|
||||||
@@ -5827,10 +5827,10 @@ pub(crate) mod harness {
|
|||||||
pub(crate) async fn do_try_load(
|
pub(crate) async fn do_try_load(
|
||||||
&self,
|
&self,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<Arc<Tenant>> {
|
) -> anyhow::Result<Arc<TenantShard>> {
|
||||||
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
|
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
|
||||||
|
|
||||||
let tenant = Arc::new(Tenant::new(
|
let tenant = Arc::new(TenantShard::new(
|
||||||
TenantState::Attaching,
|
TenantState::Attaching,
|
||||||
self.conf,
|
self.conf,
|
||||||
AttachedTenantConf::try_from(LocationConf::attached_single(
|
AttachedTenantConf::try_from(LocationConf::attached_single(
|
||||||
@@ -6046,7 +6046,7 @@ mod tests {
|
|||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
async fn randomize_timeline(
|
async fn randomize_timeline(
|
||||||
tenant: &Arc<Tenant>,
|
tenant: &Arc<TenantShard>,
|
||||||
new_timeline_id: TimelineId,
|
new_timeline_id: TimelineId,
|
||||||
pg_version: u32,
|
pg_version: u32,
|
||||||
spec: TestTimelineSpecification,
|
spec: TestTimelineSpecification,
|
||||||
@@ -6936,7 +6936,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn bulk_insert_compact_gc(
|
async fn bulk_insert_compact_gc(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline: &Arc<Timeline>,
|
timeline: &Arc<Timeline>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
lsn: Lsn,
|
lsn: Lsn,
|
||||||
@@ -6948,7 +6948,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn bulk_insert_maybe_compact_gc(
|
async fn bulk_insert_maybe_compact_gc(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline: &Arc<Timeline>,
|
timeline: &Arc<Timeline>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
mut lsn: Lsn,
|
mut lsn: Lsn,
|
||||||
@@ -7858,7 +7858,7 @@ mod tests {
|
|||||||
let (tline, _ctx) = tenant
|
let (tline, _ctx) = tenant
|
||||||
.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
|
.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
// Leave the timeline ID in [`Tenant::timelines_creating`] to exclude attempting to create it again
|
// Leave the timeline ID in [`TenantShard::timelines_creating`] to exclude attempting to create it again
|
||||||
let raw_tline = tline.raw_timeline().unwrap();
|
let raw_tline = tline.raw_timeline().unwrap();
|
||||||
raw_tline
|
raw_tline
|
||||||
.shutdown(super::timeline::ShutdownMode::Hard)
|
.shutdown(super::timeline::ShutdownMode::Hard)
|
||||||
|
|||||||
@@ -37,6 +37,63 @@ pub struct CompressionInfo {
|
|||||||
pub compressed_size: Option<usize>,
|
pub compressed_size: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A blob header, with header+data length and compression info.
|
||||||
|
///
|
||||||
|
/// TODO: use this more widely, and add an encode() method too.
|
||||||
|
/// TODO: document the header format.
|
||||||
|
#[derive(Clone, Copy, Default)]
|
||||||
|
pub struct Header {
|
||||||
|
pub header_len: usize,
|
||||||
|
pub data_len: usize,
|
||||||
|
pub compression_bits: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Header {
|
||||||
|
/// Decodes a header from a byte slice.
|
||||||
|
pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
|
||||||
|
let Some(&first_header_byte) = bytes.first() else {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
"zero-length blob header",
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
// If the first bit is 0, this is just a 1-byte length prefix up to 128 bytes.
|
||||||
|
if first_header_byte < 0x80 {
|
||||||
|
return Ok(Self {
|
||||||
|
header_len: 1, // by definition
|
||||||
|
data_len: first_header_byte as usize,
|
||||||
|
compression_bits: BYTE_UNCOMPRESSED,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, this is a 4-byte header containing compression information and length.
|
||||||
|
const HEADER_LEN: usize = 4;
|
||||||
|
let mut header_buf: [u8; HEADER_LEN] = bytes[0..HEADER_LEN].try_into().map_err(|_| {
|
||||||
|
std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
format!("blob header too short: {bytes:?}"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// TODO: verify the compression bits and convert to an enum.
|
||||||
|
let compression_bits = header_buf[0] & LEN_COMPRESSION_BIT_MASK;
|
||||||
|
header_buf[0] &= !LEN_COMPRESSION_BIT_MASK;
|
||||||
|
let data_len = u32::from_be_bytes(header_buf) as usize;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
header_len: HEADER_LEN,
|
||||||
|
data_len,
|
||||||
|
compression_bits,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the total header+data length.
|
||||||
|
pub fn total_len(&self) -> usize {
|
||||||
|
self.header_len + self.data_len
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl BlockCursor<'_> {
|
impl BlockCursor<'_> {
|
||||||
/// Read a blob into a new buffer.
|
/// Read a blob into a new buffer.
|
||||||
pub async fn read_blob(
|
pub async fn read_blob(
|
||||||
@@ -389,6 +446,34 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
|
|||||||
};
|
};
|
||||||
(srcbuf, res.map(|_| (offset, compression_info)))
|
(srcbuf, res.map(|_| (offset, compression_info)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Writes a raw blob containing both header and data, returning its offset.
|
||||||
|
pub(crate) async fn write_blob_raw<Buf: IoBuf + Send>(
|
||||||
|
&mut self,
|
||||||
|
raw_with_header: FullSlice<Buf>,
|
||||||
|
ctx: &RequestContext,
|
||||||
|
) -> (FullSlice<Buf>, Result<u64, Error>) {
|
||||||
|
// Verify the header, to ensure we don't write invalid/corrupt data.
|
||||||
|
let header = match Header::decode(&raw_with_header) {
|
||||||
|
Ok(header) => header,
|
||||||
|
Err(err) => return (raw_with_header, Err(err)),
|
||||||
|
};
|
||||||
|
if raw_with_header.len() != header.total_len() {
|
||||||
|
let header_total_len = header.total_len();
|
||||||
|
let raw_len = raw_with_header.len();
|
||||||
|
return (
|
||||||
|
raw_with_header,
|
||||||
|
Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
format!("header length mismatch: {header_total_len} != {raw_len}"),
|
||||||
|
)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let offset = self.offset;
|
||||||
|
let (raw_with_header, result) = self.write_all(raw_with_header, ctx).await;
|
||||||
|
(raw_with_header, result.map(|_| offset))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BlobWriter<true> {
|
impl BlobWriter<true> {
|
||||||
|
|||||||
@@ -714,7 +714,7 @@ impl LayerMap {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
|
pub fn iter_historic_layers(&self) -> impl ExactSizeIterator<Item = Arc<PersistentLayerDesc>> {
|
||||||
self.historic.iter()
|
self.historic.iter()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -504,7 +504,7 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate all the layers
|
/// Iterate all the layers
|
||||||
pub fn iter(&self) -> impl '_ + Iterator<Item = Value> {
|
pub fn iter(&self) -> impl ExactSizeIterator<Item = Value> {
|
||||||
// NOTE we can actually perform this without rebuilding,
|
// NOTE we can actually perform this without rebuilding,
|
||||||
// but it's not necessary for now.
|
// but it's not necessary for now.
|
||||||
if !self.buffer.is_empty() {
|
if !self.buffer.is_empty() {
|
||||||
|
|||||||
@@ -564,8 +564,9 @@ mod tests {
|
|||||||
Lsn(0),
|
Lsn(0),
|
||||||
Lsn(0),
|
Lsn(0),
|
||||||
Lsn(0),
|
Lsn(0),
|
||||||
// Any version will do here, so use the default
|
// Updating this version to 17 will cause the test to fail at the
|
||||||
crate::DEFAULT_PG_VERSION,
|
// next assert_eq!().
|
||||||
|
16,
|
||||||
);
|
);
|
||||||
let expected_bytes = vec![
|
let expected_bytes = vec![
|
||||||
/* TimelineMetadataHeader */
|
/* TimelineMetadataHeader */
|
||||||
|
|||||||
@@ -52,7 +52,9 @@ use crate::tenant::config::{
|
|||||||
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||||
use crate::tenant::storage_layer::inmemory_layer;
|
use crate::tenant::storage_layer::inmemory_layer;
|
||||||
use crate::tenant::timeline::ShutdownMode;
|
use crate::tenant::timeline::ShutdownMode;
|
||||||
use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Tenant, TenantState};
|
use crate::tenant::{
|
||||||
|
AttachedTenantConf, GcError, LoadConfigError, SpawnMode, TenantShard, TenantState,
|
||||||
|
};
|
||||||
use crate::virtual_file::MaybeFatalIo;
|
use crate::virtual_file::MaybeFatalIo;
|
||||||
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
||||||
|
|
||||||
@@ -67,7 +69,7 @@ use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
|||||||
/// having a properly acquired generation (Secondary doesn't need a generation)
|
/// having a properly acquired generation (Secondary doesn't need a generation)
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) enum TenantSlot {
|
pub(crate) enum TenantSlot {
|
||||||
Attached(Arc<Tenant>),
|
Attached(Arc<TenantShard>),
|
||||||
Secondary(Arc<SecondaryTenant>),
|
Secondary(Arc<SecondaryTenant>),
|
||||||
/// In this state, other administrative operations acting on the TenantId should
|
/// In this state, other administrative operations acting on the TenantId should
|
||||||
/// block, or return a retry indicator equivalent to HTTP 503.
|
/// block, or return a retry indicator equivalent to HTTP 503.
|
||||||
@@ -86,7 +88,7 @@ impl std::fmt::Debug for TenantSlot {
|
|||||||
|
|
||||||
impl TenantSlot {
|
impl TenantSlot {
|
||||||
/// Return the `Tenant` in this slot if attached, else None
|
/// Return the `Tenant` in this slot if attached, else None
|
||||||
fn get_attached(&self) -> Option<&Arc<Tenant>> {
|
fn get_attached(&self) -> Option<&Arc<TenantShard>> {
|
||||||
match self {
|
match self {
|
||||||
Self::Attached(t) => Some(t),
|
Self::Attached(t) => Some(t),
|
||||||
Self::Secondary(_) => None,
|
Self::Secondary(_) => None,
|
||||||
@@ -164,7 +166,7 @@ impl TenantStartupMode {
|
|||||||
/// Result type for looking up a TenantId to a specific shard
|
/// Result type for looking up a TenantId to a specific shard
|
||||||
pub(crate) enum ShardResolveResult {
|
pub(crate) enum ShardResolveResult {
|
||||||
NotFound,
|
NotFound,
|
||||||
Found(Arc<Tenant>),
|
Found(Arc<TenantShard>),
|
||||||
// Wait for this barrrier, then query again
|
// Wait for this barrrier, then query again
|
||||||
InProgress(utils::completion::Barrier),
|
InProgress(utils::completion::Barrier),
|
||||||
}
|
}
|
||||||
@@ -173,7 +175,7 @@ impl TenantsMap {
|
|||||||
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
|
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
|
||||||
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
|
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
|
||||||
/// None is returned.
|
/// None is returned.
|
||||||
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
|
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<TenantShard>> {
|
||||||
match self {
|
match self {
|
||||||
TenantsMap::Initializing => None,
|
TenantsMap::Initializing => None,
|
||||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||||
@@ -410,7 +412,7 @@ fn load_tenant_config(
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(Tenant::load_tenant_config(conf, &tenant_shard_id))
|
Some(TenantShard::load_tenant_config(conf, &tenant_shard_id))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Initial stage of load: walk the local tenants directory, clean up any temp files,
|
/// Initial stage of load: walk the local tenants directory, clean up any temp files,
|
||||||
@@ -606,7 +608,8 @@ pub async fn init_tenant_mgr(
|
|||||||
// Presence of a generation number implies attachment: attach the tenant
|
// Presence of a generation number implies attachment: attach the tenant
|
||||||
// if it wasn't already, and apply the generation number.
|
// if it wasn't already, and apply the generation number.
|
||||||
config_write_futs.push(async move {
|
config_write_futs.push(async move {
|
||||||
let r = Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
|
let r =
|
||||||
|
TenantShard::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
|
||||||
(tenant_shard_id, location_conf, r)
|
(tenant_shard_id, location_conf, r)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -694,7 +697,7 @@ fn tenant_spawn(
|
|||||||
init_order: Option<InitializationOrder>,
|
init_order: Option<InitializationOrder>,
|
||||||
mode: SpawnMode,
|
mode: SpawnMode,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<Arc<Tenant>, GlobalShutDown> {
|
) -> Result<Arc<TenantShard>, GlobalShutDown> {
|
||||||
// All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed
|
// All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed
|
||||||
// path, and contains a configuration file. Assertions that do synchronous I/O are limited to debug mode
|
// path, and contains a configuration file. Assertions that do synchronous I/O are limited to debug mode
|
||||||
// to avoid impacting prod runtime performance.
|
// to avoid impacting prod runtime performance.
|
||||||
@@ -706,7 +709,7 @@ fn tenant_spawn(
|
|||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
Tenant::spawn(
|
TenantShard::spawn(
|
||||||
conf,
|
conf,
|
||||||
tenant_shard_id,
|
tenant_shard_id,
|
||||||
resources,
|
resources,
|
||||||
@@ -883,12 +886,12 @@ impl TenantManager {
|
|||||||
/// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or currently
|
/// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or currently
|
||||||
/// undergoing a state change (i.e. slot is InProgress).
|
/// undergoing a state change (i.e. slot is InProgress).
|
||||||
///
|
///
|
||||||
/// The return Tenant is not guaranteed to be active: check its status after obtaing it, or
|
/// The return TenantShard is not guaranteed to be active: check its status after obtaing it, or
|
||||||
/// use [`Tenant::wait_to_become_active`] before using it if you will do I/O on it.
|
/// use [`TenantShard::wait_to_become_active`] before using it if you will do I/O on it.
|
||||||
pub(crate) fn get_attached_tenant_shard(
|
pub(crate) fn get_attached_tenant_shard(
|
||||||
&self,
|
&self,
|
||||||
tenant_shard_id: TenantShardId,
|
tenant_shard_id: TenantShardId,
|
||||||
) -> Result<Arc<Tenant>, GetTenantError> {
|
) -> Result<Arc<TenantShard>, GetTenantError> {
|
||||||
let locked = self.tenants.read().unwrap();
|
let locked = self.tenants.read().unwrap();
|
||||||
|
|
||||||
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
|
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
|
||||||
@@ -937,12 +940,12 @@ impl TenantManager {
|
|||||||
flush: Option<Duration>,
|
flush: Option<Duration>,
|
||||||
mut spawn_mode: SpawnMode,
|
mut spawn_mode: SpawnMode,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<Option<Arc<Tenant>>, UpsertLocationError> {
|
) -> Result<Option<Arc<TenantShard>>, UpsertLocationError> {
|
||||||
debug_assert_current_span_has_tenant_id();
|
debug_assert_current_span_has_tenant_id();
|
||||||
info!("configuring tenant location to state {new_location_config:?}");
|
info!("configuring tenant location to state {new_location_config:?}");
|
||||||
|
|
||||||
enum FastPathModified {
|
enum FastPathModified {
|
||||||
Attached(Arc<Tenant>),
|
Attached(Arc<TenantShard>),
|
||||||
Secondary(Arc<SecondaryTenant>),
|
Secondary(Arc<SecondaryTenant>),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -999,9 +1002,13 @@ impl TenantManager {
|
|||||||
// phase of writing config and/or waiting for flush, before returning.
|
// phase of writing config and/or waiting for flush, before returning.
|
||||||
match fast_path_taken {
|
match fast_path_taken {
|
||||||
Some(FastPathModified::Attached(tenant)) => {
|
Some(FastPathModified::Attached(tenant)) => {
|
||||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
TenantShard::persist_tenant_config(
|
||||||
.await
|
self.conf,
|
||||||
.fatal_err("write tenant shard config");
|
&tenant_shard_id,
|
||||||
|
&new_location_config,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.fatal_err("write tenant shard config");
|
||||||
|
|
||||||
// Transition to AttachedStale means we may well hold a valid generation
|
// Transition to AttachedStale means we may well hold a valid generation
|
||||||
// still, and have been requested to go stale as part of a migration. If
|
// still, and have been requested to go stale as part of a migration. If
|
||||||
@@ -1030,9 +1037,13 @@ impl TenantManager {
|
|||||||
return Ok(Some(tenant));
|
return Ok(Some(tenant));
|
||||||
}
|
}
|
||||||
Some(FastPathModified::Secondary(_secondary_tenant)) => {
|
Some(FastPathModified::Secondary(_secondary_tenant)) => {
|
||||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
TenantShard::persist_tenant_config(
|
||||||
.await
|
self.conf,
|
||||||
.fatal_err("write tenant shard config");
|
&tenant_shard_id,
|
||||||
|
&new_location_config,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.fatal_err("write tenant shard config");
|
||||||
|
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
@@ -1122,7 +1133,7 @@ impl TenantManager {
|
|||||||
// Before activating either secondary or attached mode, persist the
|
// Before activating either secondary or attached mode, persist the
|
||||||
// configuration, so that on restart we will re-attach (or re-start
|
// configuration, so that on restart we will re-attach (or re-start
|
||||||
// secondary) on the tenant.
|
// secondary) on the tenant.
|
||||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
TenantShard::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||||
.await
|
.await
|
||||||
.fatal_err("write tenant shard config");
|
.fatal_err("write tenant shard config");
|
||||||
|
|
||||||
@@ -1262,7 +1273,7 @@ impl TenantManager {
|
|||||||
|
|
||||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||||
let timelines_path = self.conf.timelines_path(&tenant_shard_id);
|
let timelines_path = self.conf.timelines_path(&tenant_shard_id);
|
||||||
let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)?;
|
let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)?;
|
||||||
|
|
||||||
if drop_cache {
|
if drop_cache {
|
||||||
tracing::info!("Dropping local file cache");
|
tracing::info!("Dropping local file cache");
|
||||||
@@ -1297,7 +1308,7 @@ impl TenantManager {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<Tenant>> {
|
pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<TenantShard>> {
|
||||||
let locked = self.tenants.read().unwrap();
|
let locked = self.tenants.read().unwrap();
|
||||||
match &*locked {
|
match &*locked {
|
||||||
TenantsMap::Initializing => Vec::new(),
|
TenantsMap::Initializing => Vec::new(),
|
||||||
@@ -1446,7 +1457,7 @@ impl TenantManager {
|
|||||||
#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
|
#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
|
||||||
pub(crate) async fn shard_split(
|
pub(crate) async fn shard_split(
|
||||||
&self,
|
&self,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
new_shard_count: ShardCount,
|
new_shard_count: ShardCount,
|
||||||
new_stripe_size: Option<ShardStripeSize>,
|
new_stripe_size: Option<ShardStripeSize>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -1476,7 +1487,7 @@ impl TenantManager {
|
|||||||
|
|
||||||
pub(crate) async fn do_shard_split(
|
pub(crate) async fn do_shard_split(
|
||||||
&self,
|
&self,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
new_shard_count: ShardCount,
|
new_shard_count: ShardCount,
|
||||||
new_stripe_size: Option<ShardStripeSize>,
|
new_stripe_size: Option<ShardStripeSize>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -1703,7 +1714,7 @@ impl TenantManager {
|
|||||||
/// For each resident layer in the parent shard, we will hard link it into all of the child shards.
|
/// For each resident layer in the parent shard, we will hard link it into all of the child shards.
|
||||||
async fn shard_split_hardlink(
|
async fn shard_split_hardlink(
|
||||||
&self,
|
&self,
|
||||||
parent_shard: &Tenant,
|
parent_shard: &TenantShard,
|
||||||
child_shards: Vec<TenantShardId>,
|
child_shards: Vec<TenantShardId>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
debug_assert_current_span_has_tenant_id();
|
debug_assert_current_span_has_tenant_id();
|
||||||
@@ -1988,7 +1999,7 @@ impl TenantManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||||
let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)
|
let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)
|
||||||
.map_err(|e| Error::DetachReparent(e.into()))?;
|
.map_err(|e| Error::DetachReparent(e.into()))?;
|
||||||
|
|
||||||
let shard_identity = config.shard;
|
let shard_identity = config.shard;
|
||||||
|
|||||||
@@ -133,7 +133,7 @@
|
|||||||
//! - Initiate upload queue with that [`IndexPart`].
|
//! - Initiate upload queue with that [`IndexPart`].
|
||||||
//! - Reschedule all lost operations by comparing the local filesystem state
|
//! - Reschedule all lost operations by comparing the local filesystem state
|
||||||
//! and remote state as per [`IndexPart`]. This is done in
|
//! and remote state as per [`IndexPart`]. This is done in
|
||||||
//! [`Tenant::timeline_init_and_sync`].
|
//! [`TenantShard::timeline_init_and_sync`].
|
||||||
//!
|
//!
|
||||||
//! Note that if we crash during file deletion between the index update
|
//! Note that if we crash during file deletion between the index update
|
||||||
//! that removes the file from the list of files, and deleting the remote file,
|
//! that removes the file from the list of files, and deleting the remote file,
|
||||||
@@ -171,7 +171,7 @@
|
|||||||
//! If no remote storage configuration is provided, the [`RemoteTimelineClient`] is
|
//! If no remote storage configuration is provided, the [`RemoteTimelineClient`] is
|
||||||
//! not created and the uploads are skipped.
|
//! not created and the uploads are skipped.
|
||||||
//!
|
//!
|
||||||
//! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
|
//! [`TenantShard::timeline_init_and_sync`]: super::TenantShard::timeline_init_and_sync
|
||||||
//! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map
|
//! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map
|
||||||
|
|
||||||
pub(crate) mod download;
|
pub(crate) mod download;
|
||||||
@@ -2743,7 +2743,7 @@ mod tests {
|
|||||||
use crate::tenant::config::AttachmentMode;
|
use crate::tenant::config::AttachmentMode;
|
||||||
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||||
use crate::tenant::{Tenant, Timeline};
|
use crate::tenant::{TenantShard, Timeline};
|
||||||
|
|
||||||
pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
|
pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
|
||||||
format!("contents for {name}").into()
|
format!("contents for {name}").into()
|
||||||
@@ -2796,7 +2796,7 @@ mod tests {
|
|||||||
|
|
||||||
struct TestSetup {
|
struct TestSetup {
|
||||||
harness: TenantHarness,
|
harness: TenantHarness,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
timeline: Arc<Timeline>,
|
timeline: Arc<Timeline>,
|
||||||
tenant_ctx: RequestContext,
|
tenant_ctx: RequestContext,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -452,7 +452,7 @@ async fn do_download_index_part(
|
|||||||
/// generation (normal case when migrating/restarting). Only if both of these return 404 do we fall back
|
/// generation (normal case when migrating/restarting). Only if both of these return 404 do we fall back
|
||||||
/// to listing objects.
|
/// to listing objects.
|
||||||
///
|
///
|
||||||
/// * `my_generation`: the value of `[crate::tenant::Tenant::generation]`
|
/// * `my_generation`: the value of `[crate::tenant::TenantShard::generation]`
|
||||||
/// * `what`: for logging, what object are we downloading
|
/// * `what`: for logging, what object are we downloading
|
||||||
/// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation)
|
/// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation)
|
||||||
/// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless
|
/// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ use super::scheduler::{
|
|||||||
use super::{CommandRequest, SecondaryTenantError, UploadCommand};
|
use super::{CommandRequest, SecondaryTenantError, UploadCommand};
|
||||||
use crate::TEMP_FILE_SUFFIX;
|
use crate::TEMP_FILE_SUFFIX;
|
||||||
use crate::metrics::SECONDARY_MODE;
|
use crate::metrics::SECONDARY_MODE;
|
||||||
use crate::tenant::Tenant;
|
use crate::tenant::TenantShard;
|
||||||
use crate::tenant::config::AttachmentMode;
|
use crate::tenant::config::AttachmentMode;
|
||||||
use crate::tenant::mgr::{GetTenantError, TenantManager};
|
use crate::tenant::mgr::{GetTenantError, TenantManager};
|
||||||
use crate::tenant::remote_timeline_client::remote_heatmap_path;
|
use crate::tenant::remote_timeline_client::remote_heatmap_path;
|
||||||
@@ -74,7 +74,7 @@ impl RunningJob for WriteInProgress {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct UploadPending {
|
struct UploadPending {
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
last_upload: Option<LastUploadState>,
|
last_upload: Option<LastUploadState>,
|
||||||
target_time: Option<Instant>,
|
target_time: Option<Instant>,
|
||||||
period: Option<Duration>,
|
period: Option<Duration>,
|
||||||
@@ -106,7 +106,7 @@ impl scheduler::Completion for WriteComplete {
|
|||||||
struct UploaderTenantState {
|
struct UploaderTenantState {
|
||||||
// This Weak only exists to enable culling idle instances of this type
|
// This Weak only exists to enable culling idle instances of this type
|
||||||
// when the Tenant has been deallocated.
|
// when the Tenant has been deallocated.
|
||||||
tenant: Weak<Tenant>,
|
tenant: Weak<TenantShard>,
|
||||||
|
|
||||||
/// Digest of the serialized heatmap that we last successfully uploaded
|
/// Digest of the serialized heatmap that we last successfully uploaded
|
||||||
last_upload_state: Option<LastUploadState>,
|
last_upload_state: Option<LastUploadState>,
|
||||||
@@ -357,7 +357,7 @@ struct LastUploadState {
|
|||||||
/// of the object we would have uploaded.
|
/// of the object we would have uploaded.
|
||||||
async fn upload_tenant_heatmap(
|
async fn upload_tenant_heatmap(
|
||||||
remote_storage: GenericRemoteStorage,
|
remote_storage: GenericRemoteStorage,
|
||||||
tenant: &Arc<Tenant>,
|
tenant: &Arc<TenantShard>,
|
||||||
last_upload: Option<LastUploadState>,
|
last_upload: Option<LastUploadState>,
|
||||||
) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
|
) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
|
||||||
debug_assert_current_span_has_tenant_id();
|
debug_assert_current_span_has_tenant_id();
|
||||||
|
|||||||
@@ -360,7 +360,7 @@ where
|
|||||||
|
|
||||||
/// Periodic execution phase: inspect all attached tenants and schedule any work they require.
|
/// Periodic execution phase: inspect all attached tenants and schedule any work they require.
|
||||||
///
|
///
|
||||||
/// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::Tenant`] or [`crate::tenant::secondary::SecondaryTenant`]
|
/// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::TenantShard`] or [`crate::tenant::secondary::SecondaryTenant`]
|
||||||
///
|
///
|
||||||
/// This function resets the pending list: it is assumed that the caller may change their mind about
|
/// This function resets the pending list: it is assumed that the caller may change their mind about
|
||||||
/// which tenants need work between calls to schedule_iteration.
|
/// which tenants need work between calls to schedule_iteration.
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ use tracing::*;
|
|||||||
use utils::id::TimelineId;
|
use utils::id::TimelineId;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use super::{GcError, LogicalSizeCalculationCause, Tenant};
|
use super::{GcError, LogicalSizeCalculationCause, TenantShard};
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
||||||
use crate::tenant::{MaybeOffloaded, Timeline};
|
use crate::tenant::{MaybeOffloaded, Timeline};
|
||||||
@@ -156,7 +156,7 @@ pub struct TimelineInputs {
|
|||||||
/// initdb_lsn branchpoints* next_pitr_cutoff latest
|
/// initdb_lsn branchpoints* next_pitr_cutoff latest
|
||||||
/// ```
|
/// ```
|
||||||
pub(super) async fn gather_inputs(
|
pub(super) async fn gather_inputs(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
limit: &Arc<Semaphore>,
|
limit: &Arc<Semaphore>,
|
||||||
max_retention_period: Option<u64>,
|
max_retention_period: Option<u64>,
|
||||||
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
||||||
|
|||||||
@@ -1620,7 +1620,7 @@ pub(crate) mod test {
|
|||||||
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||||
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||||
use crate::tenant::{Tenant, Timeline};
|
use crate::tenant::{TenantShard, Timeline};
|
||||||
|
|
||||||
/// Construct an index for a fictional delta layer and and then
|
/// Construct an index for a fictional delta layer and and then
|
||||||
/// traverse in order to plan vectored reads for a query. Finally,
|
/// traverse in order to plan vectored reads for a query. Finally,
|
||||||
@@ -2209,7 +2209,7 @@ pub(crate) mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn produce_delta_layer(
|
pub(crate) async fn produce_delta_layer(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
tline: &Arc<Timeline>,
|
tline: &Arc<Timeline>,
|
||||||
mut deltas: Vec<(Key, Lsn, Value)>,
|
mut deltas: Vec<(Key, Lsn, Value)>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
|
|||||||
@@ -559,11 +559,12 @@ impl ImageLayerInner {
|
|||||||
let view = BufView::new_slice(&blobs_buf.buf);
|
let view = BufView::new_slice(&blobs_buf.buf);
|
||||||
|
|
||||||
for meta in blobs_buf.blobs.iter() {
|
for meta in blobs_buf.blobs.iter() {
|
||||||
let img_buf = meta.read(&view).await?;
|
// Just read the raw header+data and pass it through to the target layer, without
|
||||||
|
// decoding and recompressing it.
|
||||||
|
let raw = meta.raw_with_header(&view);
|
||||||
key_count += 1;
|
key_count += 1;
|
||||||
writer
|
writer
|
||||||
.put_image(meta.meta.key, img_buf.into_bytes(), ctx)
|
.put_image_raw(meta.meta.key, raw.into_bytes(), ctx)
|
||||||
.await
|
.await
|
||||||
.context(format!("Storing key {}", meta.meta.key))?;
|
.context(format!("Storing key {}", meta.meta.key))?;
|
||||||
}
|
}
|
||||||
@@ -853,6 +854,41 @@ impl ImageLayerWriterInner {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Write the next image to the file, as a raw blob header and data.
|
||||||
|
///
|
||||||
|
/// The page versions must be appended in blknum order.
|
||||||
|
///
|
||||||
|
async fn put_image_raw(
|
||||||
|
&mut self,
|
||||||
|
key: Key,
|
||||||
|
raw_with_header: Bytes,
|
||||||
|
ctx: &RequestContext,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
ensure!(self.key_range.contains(&key));
|
||||||
|
|
||||||
|
// NB: we don't update the (un)compressed metrics, since we can't determine them without
|
||||||
|
// decompressing the image. This seems okay.
|
||||||
|
self.num_keys += 1;
|
||||||
|
|
||||||
|
let (_, res) = self
|
||||||
|
.blob_writer
|
||||||
|
.write_blob_raw(raw_with_header.slice_len(), ctx)
|
||||||
|
.await;
|
||||||
|
let offset = res?;
|
||||||
|
|
||||||
|
let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];
|
||||||
|
key.write_to_byte_slice(&mut keybuf);
|
||||||
|
self.tree.append(&keybuf, offset)?;
|
||||||
|
|
||||||
|
#[cfg(feature = "testing")]
|
||||||
|
{
|
||||||
|
self.last_written_key = key;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Finish writing the image layer.
|
/// Finish writing the image layer.
|
||||||
///
|
///
|
||||||
@@ -888,7 +924,13 @@ impl ImageLayerWriterInner {
|
|||||||
crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED
|
crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED
|
||||||
.inc_by(self.uncompressed_bytes_eligible);
|
.inc_by(self.uncompressed_bytes_eligible);
|
||||||
crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN.inc_by(self.uncompressed_bytes_chosen);
|
crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN.inc_by(self.uncompressed_bytes_chosen);
|
||||||
crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);
|
|
||||||
|
// NB: filter() may pass through raw pages from a different layer, without looking at
|
||||||
|
// whether these are compressed or not. We don't track metrics for these, so avoid
|
||||||
|
// increasing `COMPRESSION_IMAGE_OUTPUT_BYTES` in this case too.
|
||||||
|
if self.uncompressed_bytes > 0 {
|
||||||
|
crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);
|
||||||
|
};
|
||||||
|
|
||||||
let mut file = self.blob_writer.into_inner();
|
let mut file = self.blob_writer.into_inner();
|
||||||
|
|
||||||
@@ -1034,6 +1076,25 @@ impl ImageLayerWriter {
|
|||||||
self.inner.as_mut().unwrap().put_image(key, img, ctx).await
|
self.inner.as_mut().unwrap().put_image(key, img, ctx).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Write the next value to the file, as a raw header and data. This allows passing through a
|
||||||
|
/// raw, potentially compressed image from a different layer file without recompressing it.
|
||||||
|
///
|
||||||
|
/// The page versions must be appended in blknum order.
|
||||||
|
///
|
||||||
|
pub async fn put_image_raw(
|
||||||
|
&mut self,
|
||||||
|
key: Key,
|
||||||
|
raw_with_header: Bytes,
|
||||||
|
ctx: &RequestContext,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
self.inner
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.put_image_raw(key, raw_with_header, ctx)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
/// Estimated size of the image layer.
|
/// Estimated size of the image layer.
|
||||||
pub(crate) fn estimated_size(&self) -> u64 {
|
pub(crate) fn estimated_size(&self) -> u64 {
|
||||||
let inner = self.inner.as_ref().unwrap();
|
let inner = self.inner.as_ref().unwrap();
|
||||||
@@ -1167,7 +1228,7 @@ mod test {
|
|||||||
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
|
||||||
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||||
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||||
use crate::tenant::{Tenant, Timeline};
|
use crate::tenant::{TenantShard, Timeline};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn image_layer_rewrite() {
|
async fn image_layer_rewrite() {
|
||||||
@@ -1349,7 +1410,7 @@ mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn produce_image_layer(
|
async fn produce_image_layer(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
tline: &Arc<Timeline>,
|
tline: &Arc<Timeline>,
|
||||||
mut images: Vec<(Key, Bytes)>,
|
mut images: Vec<(Key, Bytes)>,
|
||||||
lsn: Lsn,
|
lsn: Lsn,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
|
|||||||
use crate::tenant::throttle::Stats;
|
use crate::tenant::throttle::Stats;
|
||||||
use crate::tenant::timeline::CompactionError;
|
use crate::tenant::timeline::CompactionError;
|
||||||
use crate::tenant::timeline::compaction::CompactionOutcome;
|
use crate::tenant::timeline::compaction::CompactionOutcome;
|
||||||
use crate::tenant::{Tenant, TenantState};
|
use crate::tenant::{TenantShard, TenantState};
|
||||||
|
|
||||||
/// Semaphore limiting concurrent background tasks (across all tenants).
|
/// Semaphore limiting concurrent background tasks (across all tenants).
|
||||||
///
|
///
|
||||||
@@ -117,7 +117,7 @@ pub(crate) async fn acquire_concurrency_permit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Start per tenant background loops: compaction, GC, and ingest housekeeping.
|
/// Start per tenant background loops: compaction, GC, and ingest housekeeping.
|
||||||
pub fn start_background_loops(tenant: &Arc<Tenant>, can_start: Option<&Barrier>) {
|
pub fn start_background_loops(tenant: &Arc<TenantShard>, can_start: Option<&Barrier>) {
|
||||||
let tenant_shard_id = tenant.tenant_shard_id;
|
let tenant_shard_id = tenant.tenant_shard_id;
|
||||||
|
|
||||||
task_mgr::spawn(
|
task_mgr::spawn(
|
||||||
@@ -198,7 +198,7 @@ pub fn start_background_loops(tenant: &Arc<Tenant>, can_start: Option<&Barrier>)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Compaction task's main loop.
|
/// Compaction task's main loop.
|
||||||
async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
async fn compaction_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||||
const BASE_BACKOFF_SECS: f64 = 1.0;
|
const BASE_BACKOFF_SECS: f64 = 1.0;
|
||||||
const MAX_BACKOFF_SECS: f64 = 300.0;
|
const MAX_BACKOFF_SECS: f64 = 300.0;
|
||||||
const RECHECK_CONFIG_INTERVAL: Duration = Duration::from_secs(10);
|
const RECHECK_CONFIG_INTERVAL: Duration = Duration::from_secs(10);
|
||||||
@@ -348,7 +348,7 @@ pub(crate) fn log_compaction_error(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// GC task's main loop.
|
/// GC task's main loop.
|
||||||
async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
async fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||||
const MAX_BACKOFF_SECS: f64 = 300.0;
|
const MAX_BACKOFF_SECS: f64 = 300.0;
|
||||||
let mut error_run = 0; // consecutive errors
|
let mut error_run = 0; // consecutive errors
|
||||||
|
|
||||||
@@ -432,7 +432,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Tenant housekeeping's main loop.
|
/// Tenant housekeeping's main loop.
|
||||||
async fn tenant_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
async fn tenant_housekeeping_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||||
let mut last_throttle_flag_reset_at = Instant::now();
|
let mut last_throttle_flag_reset_at = Instant::now();
|
||||||
loop {
|
loop {
|
||||||
if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
|
if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
|
||||||
@@ -483,7 +483,7 @@ async fn tenant_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
|
|||||||
|
|
||||||
/// Waits until the tenant becomes active, or returns `ControlFlow::Break()` to shut down.
|
/// Waits until the tenant becomes active, or returns `ControlFlow::Break()` to shut down.
|
||||||
async fn wait_for_active_tenant(
|
async fn wait_for_active_tenant(
|
||||||
tenant: &Arc<Tenant>,
|
tenant: &Arc<TenantShard>,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
) -> ControlFlow<()> {
|
) -> ControlFlow<()> {
|
||||||
if tenant.current_state() == TenantState::Active {
|
if tenant.current_state() == TenantState::Active {
|
||||||
|
|||||||
@@ -412,7 +412,7 @@ pub struct Timeline {
|
|||||||
/// Timeline deletion will acquire both compaction and gc locks in whatever order.
|
/// Timeline deletion will acquire both compaction and gc locks in whatever order.
|
||||||
gc_lock: tokio::sync::Mutex<()>,
|
gc_lock: tokio::sync::Mutex<()>,
|
||||||
|
|
||||||
/// Cloned from [`super::Tenant::pagestream_throttle`] on construction.
|
/// Cloned from [`super::TenantShard::pagestream_throttle`] on construction.
|
||||||
pub(crate) pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,
|
pub(crate) pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,
|
||||||
|
|
||||||
/// Size estimator for aux file v2
|
/// Size estimator for aux file v2
|
||||||
@@ -2065,7 +2065,7 @@ impl Timeline {
|
|||||||
|
|
||||||
pub(crate) fn activate(
|
pub(crate) fn activate(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
parent: Arc<crate::tenant::Tenant>,
|
parent: Arc<crate::tenant::TenantShard>,
|
||||||
broker_client: BrokerClientChannel,
|
broker_client: BrokerClientChannel,
|
||||||
background_jobs_can_start: Option<&completion::Barrier>,
|
background_jobs_can_start: Option<&completion::Barrier>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -2702,6 +2702,14 @@ impl Timeline {
|
|||||||
.clone()
|
.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_compaction_shard_ancestor(&self) -> bool {
|
||||||
|
let tenant_conf = self.tenant_conf.load();
|
||||||
|
tenant_conf
|
||||||
|
.tenant_conf
|
||||||
|
.compaction_shard_ancestor
|
||||||
|
.unwrap_or(self.conf.default_tenant_conf.compaction_shard_ancestor)
|
||||||
|
}
|
||||||
|
|
||||||
fn get_eviction_policy(&self) -> EvictionPolicy {
|
fn get_eviction_policy(&self) -> EvictionPolicy {
|
||||||
let tenant_conf = self.tenant_conf.load();
|
let tenant_conf = self.tenant_conf.load();
|
||||||
tenant_conf
|
tenant_conf
|
||||||
@@ -3317,7 +3325,7 @@ impl Timeline {
|
|||||||
// (1) and (4)
|
// (1) and (4)
|
||||||
// TODO: this is basically a no-op now, should we remove it?
|
// TODO: this is basically a no-op now, should we remove it?
|
||||||
self.remote_client.schedule_barrier()?;
|
self.remote_client.schedule_barrier()?;
|
||||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
// TenantShard::create_timeline will wait for these uploads to happen before returning, or
|
||||||
// on retry.
|
// on retry.
|
||||||
|
|
||||||
// Now that we have the full layer map, we may calculate the visibility of layers within it (a global scan)
|
// Now that we have the full layer map, we may calculate the visibility of layers within it (a global scan)
|
||||||
@@ -5702,6 +5710,12 @@ impl Timeline {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.cancel.is_cancelled() {
|
||||||
|
// We already requested stopping the tenant, so we cannot wait for the logical size
|
||||||
|
// calculation to complete given the task might have been already cancelled.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(await_bg_cancel) = self
|
if let Some(await_bg_cancel) = self
|
||||||
.current_logical_size
|
.current_logical_size
|
||||||
.cancel_wait_for_background_loop_concurrency_limit_semaphore
|
.cancel_wait_for_background_loop_concurrency_limit_semaphore
|
||||||
@@ -5740,7 +5754,7 @@ impl Timeline {
|
|||||||
/// from our ancestor to be branches of this timeline.
|
/// from our ancestor to be branches of this timeline.
|
||||||
pub(crate) async fn prepare_to_detach_from_ancestor(
|
pub(crate) async fn prepare_to_detach_from_ancestor(
|
||||||
self: &Arc<Timeline>,
|
self: &Arc<Timeline>,
|
||||||
tenant: &crate::tenant::Tenant,
|
tenant: &crate::tenant::TenantShard,
|
||||||
options: detach_ancestor::Options,
|
options: detach_ancestor::Options,
|
||||||
behavior: DetachBehavior,
|
behavior: DetachBehavior,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -5759,7 +5773,7 @@ impl Timeline {
|
|||||||
/// resetting the tenant.
|
/// resetting the tenant.
|
||||||
pub(crate) async fn detach_from_ancestor_and_reparent(
|
pub(crate) async fn detach_from_ancestor_and_reparent(
|
||||||
self: &Arc<Timeline>,
|
self: &Arc<Timeline>,
|
||||||
tenant: &crate::tenant::Tenant,
|
tenant: &crate::tenant::TenantShard,
|
||||||
prepared: detach_ancestor::PreparedTimelineDetach,
|
prepared: detach_ancestor::PreparedTimelineDetach,
|
||||||
ancestor_timeline_id: TimelineId,
|
ancestor_timeline_id: TimelineId,
|
||||||
ancestor_lsn: Lsn,
|
ancestor_lsn: Lsn,
|
||||||
@@ -5783,7 +5797,7 @@ impl Timeline {
|
|||||||
/// The tenant must've been reset if ancestry was modified previously (in tenant manager).
|
/// The tenant must've been reset if ancestry was modified previously (in tenant manager).
|
||||||
pub(crate) async fn complete_detaching_timeline_ancestor(
|
pub(crate) async fn complete_detaching_timeline_ancestor(
|
||||||
self: &Arc<Timeline>,
|
self: &Arc<Timeline>,
|
||||||
tenant: &crate::tenant::Tenant,
|
tenant: &crate::tenant::TenantShard,
|
||||||
attempt: detach_ancestor::Attempt,
|
attempt: detach_ancestor::Attempt,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<(), detach_ancestor::Error> {
|
) -> Result<(), detach_ancestor::Error> {
|
||||||
@@ -6845,14 +6859,14 @@ impl Timeline {
|
|||||||
/// Persistently blocks gc for `Manual` reason.
|
/// Persistently blocks gc for `Manual` reason.
|
||||||
///
|
///
|
||||||
/// Returns true if no such block existed before, false otherwise.
|
/// Returns true if no such block existed before, false otherwise.
|
||||||
pub(crate) async fn block_gc(&self, tenant: &super::Tenant) -> anyhow::Result<bool> {
|
pub(crate) async fn block_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<bool> {
|
||||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
||||||
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
||||||
tenant.gc_block.insert(self, GcBlockingReason::Manual).await
|
tenant.gc_block.insert(self, GcBlockingReason::Manual).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Persistently unblocks gc for `Manual` reason.
|
/// Persistently unblocks gc for `Manual` reason.
|
||||||
pub(crate) async fn unblock_gc(&self, tenant: &super::Tenant) -> anyhow::Result<()> {
|
pub(crate) async fn unblock_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<()> {
|
||||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
use crate::tenant::remote_timeline_client::index::GcBlockingReason;
|
||||||
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
|
||||||
tenant.gc_block.remove(self, GcBlockingReason::Manual).await
|
tenant.gc_block.remove(self, GcBlockingReason::Manual).await
|
||||||
@@ -6870,8 +6884,8 @@ impl Timeline {
|
|||||||
|
|
||||||
/// Force create an image layer and place it into the layer map.
|
/// Force create an image layer and place it into the layer map.
|
||||||
///
|
///
|
||||||
/// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
|
/// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]
|
||||||
/// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
|
/// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are
|
||||||
/// placed into the layer map in one run AND be validated.
|
/// placed into the layer map in one run AND be validated.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(super) async fn force_create_image_layer(
|
pub(super) async fn force_create_image_layer(
|
||||||
@@ -6927,8 +6941,8 @@ impl Timeline {
|
|||||||
|
|
||||||
/// Force create a delta layer and place it into the layer map.
|
/// Force create a delta layer and place it into the layer map.
|
||||||
///
|
///
|
||||||
/// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
|
/// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]
|
||||||
/// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
|
/// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are
|
||||||
/// placed into the layer map in one run AND be validated.
|
/// placed into the layer map in one run AND be validated.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(super) async fn force_create_delta_layer(
|
pub(super) async fn force_create_delta_layer(
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
|
use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
|
||||||
use std::ops::{Deref, Range};
|
use std::ops::{Deref, Range};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant, SystemTime};
|
||||||
|
|
||||||
use super::layer_manager::LayerManager;
|
use super::layer_manager::LayerManager;
|
||||||
use super::{
|
use super::{
|
||||||
@@ -56,7 +56,8 @@ use crate::tenant::storage_layer::batch_split_writer::{
|
|||||||
use crate::tenant::storage_layer::filter_iterator::FilterIterator;
|
use crate::tenant::storage_layer::filter_iterator::FilterIterator;
|
||||||
use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
||||||
use crate::tenant::storage_layer::{
|
use crate::tenant::storage_layer::{
|
||||||
AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState,
|
AsLayerDesc, LayerVisibilityHint, PersistentLayerDesc, PersistentLayerKey,
|
||||||
|
ValueReconstructState,
|
||||||
};
|
};
|
||||||
use crate::tenant::tasks::log_compaction_error;
|
use crate::tenant::tasks::log_compaction_error;
|
||||||
use crate::tenant::timeline::{
|
use crate::tenant::timeline::{
|
||||||
@@ -69,7 +70,14 @@ use crate::virtual_file::{MaybeFatalIo, VirtualFile};
|
|||||||
/// Maximum number of deltas before generating an image layer in bottom-most compaction.
|
/// Maximum number of deltas before generating an image layer in bottom-most compaction.
|
||||||
const COMPACTION_DELTA_THRESHOLD: usize = 5;
|
const COMPACTION_DELTA_THRESHOLD: usize = 5;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
/// Ratio of shard-local pages below which we trigger shard ancestor layer rewrites. 0.3 means that
|
||||||
|
/// <= 30% of layer pages must belong to the descendant shard to rewrite the layer.
|
||||||
|
///
|
||||||
|
/// We choose a value < 0.5 to avoid rewriting all visible layers every time we do a power-of-two
|
||||||
|
/// shard split, which gets expensive for large tenants.
|
||||||
|
const ANCESTOR_COMPACTION_REWRITE_THRESHOLD: f64 = 0.3;
|
||||||
|
|
||||||
|
#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize)]
|
||||||
pub struct GcCompactionJobId(pub usize);
|
pub struct GcCompactionJobId(pub usize);
|
||||||
|
|
||||||
impl std::fmt::Display for GcCompactionJobId {
|
impl std::fmt::Display for GcCompactionJobId {
|
||||||
@@ -97,6 +105,43 @@ pub enum GcCompactionQueueItem {
|
|||||||
Notify(GcCompactionJobId, Option<Lsn>),
|
Notify(GcCompactionJobId, Option<Lsn>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Statistics for gc-compaction meta jobs, which contains several sub compaction jobs.
|
||||||
|
#[derive(Debug, Clone, Serialize, Default)]
|
||||||
|
pub struct GcCompactionMetaStatistics {
|
||||||
|
/// The total number of sub compaction jobs.
|
||||||
|
pub total_sub_compaction_jobs: usize,
|
||||||
|
/// The total number of sub compaction jobs that failed.
|
||||||
|
pub failed_sub_compaction_jobs: usize,
|
||||||
|
/// The total number of sub compaction jobs that succeeded.
|
||||||
|
pub succeeded_sub_compaction_jobs: usize,
|
||||||
|
/// The layer size before compaction.
|
||||||
|
pub before_compaction_layer_size: u64,
|
||||||
|
/// The layer size after compaction.
|
||||||
|
pub after_compaction_layer_size: u64,
|
||||||
|
/// The start time of the meta job.
|
||||||
|
pub start_time: Option<SystemTime>,
|
||||||
|
/// The end time of the meta job.
|
||||||
|
pub end_time: Option<SystemTime>,
|
||||||
|
/// The duration of the meta job.
|
||||||
|
pub duration_secs: f64,
|
||||||
|
/// The id of the meta job.
|
||||||
|
pub meta_job_id: GcCompactionJobId,
|
||||||
|
/// The LSN below which the layers are compacted, used to compute the statistics.
|
||||||
|
pub below_lsn: Lsn,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GcCompactionMetaStatistics {
|
||||||
|
fn finalize(&mut self) {
|
||||||
|
let end_time = SystemTime::now();
|
||||||
|
if let Some(start_time) = self.start_time {
|
||||||
|
if let Ok(duration) = end_time.duration_since(start_time) {
|
||||||
|
self.duration_secs = duration.as_secs_f64();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.end_time = Some(end_time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl GcCompactionQueueItem {
|
impl GcCompactionQueueItem {
|
||||||
pub fn into_compact_info_resp(
|
pub fn into_compact_info_resp(
|
||||||
self,
|
self,
|
||||||
@@ -134,6 +179,7 @@ struct GcCompactionQueueInner {
|
|||||||
queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,
|
queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,
|
||||||
guards: HashMap<GcCompactionJobId, GcCompactionGuardItems>,
|
guards: HashMap<GcCompactionJobId, GcCompactionGuardItems>,
|
||||||
last_id: GcCompactionJobId,
|
last_id: GcCompactionJobId,
|
||||||
|
meta_statistics: Option<GcCompactionMetaStatistics>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GcCompactionQueueInner {
|
impl GcCompactionQueueInner {
|
||||||
@@ -165,6 +211,7 @@ impl GcCompactionQueue {
|
|||||||
queued: VecDeque::new(),
|
queued: VecDeque::new(),
|
||||||
guards: HashMap::new(),
|
guards: HashMap::new(),
|
||||||
last_id: GcCompactionJobId(0),
|
last_id: GcCompactionJobId(0),
|
||||||
|
meta_statistics: None,
|
||||||
}),
|
}),
|
||||||
consumer_lock: tokio::sync::Mutex::new(()),
|
consumer_lock: tokio::sync::Mutex::new(()),
|
||||||
}
|
}
|
||||||
@@ -349,6 +396,23 @@ impl GcCompactionQueue {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn collect_layer_below_lsn(
|
||||||
|
&self,
|
||||||
|
timeline: &Arc<Timeline>,
|
||||||
|
lsn: Lsn,
|
||||||
|
) -> Result<u64, CompactionError> {
|
||||||
|
let guard = timeline.layers.read().await;
|
||||||
|
let layer_map = guard.layer_map()?;
|
||||||
|
let layers = layer_map.iter_historic_layers().collect_vec();
|
||||||
|
let mut size = 0;
|
||||||
|
for layer in layers {
|
||||||
|
if layer.lsn_range.start <= lsn {
|
||||||
|
size += layer.file_size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(size)
|
||||||
|
}
|
||||||
|
|
||||||
/// Notify the caller the job has finished and unblock GC.
|
/// Notify the caller the job has finished and unblock GC.
|
||||||
fn notify_and_unblock(&self, id: GcCompactionJobId) {
|
fn notify_and_unblock(&self, id: GcCompactionJobId) {
|
||||||
info!("compaction job id={} finished", id);
|
info!("compaction job id={} finished", id);
|
||||||
@@ -358,6 +422,16 @@ impl GcCompactionQueue {
|
|||||||
let _ = tx.send(());
|
let _ = tx.send(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(ref meta_statistics) = guard.meta_statistics {
|
||||||
|
if meta_statistics.meta_job_id == id {
|
||||||
|
if let Ok(stats) = serde_json::to_string(&meta_statistics) {
|
||||||
|
info!(
|
||||||
|
"gc-compaction meta statistics for job id = {}: {}",
|
||||||
|
id, stats
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clear_running_job(&self) {
|
fn clear_running_job(&self) {
|
||||||
@@ -397,7 +471,11 @@ impl GcCompactionQueue {
|
|||||||
let mut pending_tasks = Vec::new();
|
let mut pending_tasks = Vec::new();
|
||||||
// gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate.
|
// gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate.
|
||||||
// And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN.
|
// And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN.
|
||||||
let expected_l2_lsn = jobs.iter().map(|job| job.compact_lsn_range.end).max();
|
let expected_l2_lsn = jobs
|
||||||
|
.iter()
|
||||||
|
.map(|job| job.compact_lsn_range.end)
|
||||||
|
.max()
|
||||||
|
.unwrap();
|
||||||
for job in jobs {
|
for job in jobs {
|
||||||
// Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions`
|
// Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions`
|
||||||
// until we do further refactors to allow directly call `compact_with_gc`.
|
// until we do further refactors to allow directly call `compact_with_gc`.
|
||||||
@@ -422,9 +500,13 @@ impl GcCompactionQueue {
|
|||||||
if !auto {
|
if !auto {
|
||||||
pending_tasks.push(GcCompactionQueueItem::Notify(id, None));
|
pending_tasks.push(GcCompactionQueueItem::Notify(id, None));
|
||||||
} else {
|
} else {
|
||||||
pending_tasks.push(GcCompactionQueueItem::Notify(id, expected_l2_lsn));
|
pending_tasks.push(GcCompactionQueueItem::Notify(id, Some(expected_l2_lsn)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let layer_size = self
|
||||||
|
.collect_layer_below_lsn(timeline, expected_l2_lsn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut guard = self.inner.lock().unwrap();
|
let mut guard = self.inner.lock().unwrap();
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
@@ -436,7 +518,16 @@ impl GcCompactionQueue {
|
|||||||
for item in tasks {
|
for item in tasks {
|
||||||
guard.queued.push_front(item);
|
guard.queued.push_front(item);
|
||||||
}
|
}
|
||||||
|
guard.meta_statistics = Some(GcCompactionMetaStatistics {
|
||||||
|
meta_job_id: id,
|
||||||
|
start_time: Some(SystemTime::now()),
|
||||||
|
before_compaction_layer_size: layer_size,
|
||||||
|
below_lsn: expected_l2_lsn,
|
||||||
|
total_sub_compaction_jobs: jobs_len,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs",
|
"scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs",
|
||||||
jobs_len
|
jobs_len
|
||||||
@@ -565,6 +656,10 @@ impl GcCompactionQueue {
|
|||||||
Err(err) => {
|
Err(err) => {
|
||||||
warn!(%err, "failed to run gc-compaction subcompaction job");
|
warn!(%err, "failed to run gc-compaction subcompaction job");
|
||||||
self.clear_running_job();
|
self.clear_running_job();
|
||||||
|
let mut guard = self.inner.lock().unwrap();
|
||||||
|
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||||
|
meta_statistics.failed_sub_compaction_jobs += 1;
|
||||||
|
}
|
||||||
return Err(err);
|
return Err(err);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -574,8 +669,34 @@ impl GcCompactionQueue {
|
|||||||
// we need to clean things up before returning from the function.
|
// we need to clean things up before returning from the function.
|
||||||
yield_for_l0 = true;
|
yield_for_l0 = true;
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
let mut guard = self.inner.lock().unwrap();
|
||||||
|
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||||
|
meta_statistics.succeeded_sub_compaction_jobs += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
GcCompactionQueueItem::Notify(id, l2_lsn) => {
|
GcCompactionQueueItem::Notify(id, l2_lsn) => {
|
||||||
|
let below_lsn = {
|
||||||
|
let mut guard = self.inner.lock().unwrap();
|
||||||
|
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||||
|
meta_statistics.below_lsn
|
||||||
|
} else {
|
||||||
|
Lsn::INVALID
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let layer_size = if below_lsn != Lsn::INVALID {
|
||||||
|
self.collect_layer_below_lsn(timeline, below_lsn).await?
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
{
|
||||||
|
let mut guard = self.inner.lock().unwrap();
|
||||||
|
if let Some(ref mut meta_statistics) = guard.meta_statistics {
|
||||||
|
meta_statistics.after_compaction_layer_size = layer_size;
|
||||||
|
meta_statistics.finalize();
|
||||||
|
}
|
||||||
|
}
|
||||||
self.notify_and_unblock(id);
|
self.notify_and_unblock(id);
|
||||||
if let Some(l2_lsn) = l2_lsn {
|
if let Some(l2_lsn) = l2_lsn {
|
||||||
let current_l2_lsn = timeline
|
let current_l2_lsn = timeline
|
||||||
@@ -819,7 +940,15 @@ impl KeyHistoryRetention {
|
|||||||
base_img: &Option<(Lsn, &Bytes)>,
|
base_img: &Option<(Lsn, &Bytes)>,
|
||||||
history: &[(Lsn, &NeonWalRecord)],
|
history: &[(Lsn, &NeonWalRecord)],
|
||||||
tline: &Arc<Timeline>,
|
tline: &Arc<Timeline>,
|
||||||
|
skip_empty: bool,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
if base_img.is_none() && history.is_empty() {
|
||||||
|
if skip_empty {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
anyhow::bail!("verification failed: key {} has no history at {}", key, lsn);
|
||||||
|
};
|
||||||
|
|
||||||
let mut records = history
|
let mut records = history
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(lsn, val)| (*lsn, (*val).clone()))
|
.map(|(lsn, val)| (*lsn, (*val).clone()))
|
||||||
@@ -860,17 +989,12 @@ impl KeyHistoryRetention {
|
|||||||
if *retain_lsn >= min_lsn {
|
if *retain_lsn >= min_lsn {
|
||||||
// Only verify after the key appears in the full history for the first time.
|
// Only verify after the key appears in the full history for the first time.
|
||||||
|
|
||||||
if base_img.is_none() && history.is_empty() {
|
|
||||||
anyhow::bail!(
|
|
||||||
"verificatoin failed: key {} has no history at {}",
|
|
||||||
key,
|
|
||||||
retain_lsn
|
|
||||||
);
|
|
||||||
};
|
|
||||||
// We don't modify history: in theory, we could replace the history with a single
|
// We don't modify history: in theory, we could replace the history with a single
|
||||||
// image as in `generate_key_retention` to make redos at later LSNs faster. But we
|
// image as in `generate_key_retention` to make redos at later LSNs faster. But we
|
||||||
// want to verify everything as if they are read from the real layer map.
|
// want to verify everything as if they are read from the real layer map.
|
||||||
collect_and_verify(key, *retain_lsn, &base_img, &history, tline).await?;
|
collect_and_verify(key, *retain_lsn, &base_img, &history, tline, false)
|
||||||
|
.await
|
||||||
|
.context("below horizon retain_lsn")?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -878,13 +1002,17 @@ impl KeyHistoryRetention {
|
|||||||
match val {
|
match val {
|
||||||
Value::Image(img) => {
|
Value::Image(img) => {
|
||||||
// Above the GC horizon, we verify every time we see an image.
|
// Above the GC horizon, we verify every time we see an image.
|
||||||
collect_and_verify(key, *lsn, &base_img, &history, tline).await?;
|
collect_and_verify(key, *lsn, &base_img, &history, tline, true)
|
||||||
|
.await
|
||||||
|
.context("above horizon full image")?;
|
||||||
base_img = Some((*lsn, img));
|
base_img = Some((*lsn, img));
|
||||||
history.clear();
|
history.clear();
|
||||||
}
|
}
|
||||||
Value::WalRecord(rec) if val.will_init() => {
|
Value::WalRecord(rec) if val.will_init() => {
|
||||||
// Above the GC horizon, we verify every time we see an init record.
|
// Above the GC horizon, we verify every time we see an init record.
|
||||||
collect_and_verify(key, *lsn, &base_img, &history, tline).await?;
|
collect_and_verify(key, *lsn, &base_img, &history, tline, true)
|
||||||
|
.await
|
||||||
|
.context("above horizon init record")?;
|
||||||
base_img = None;
|
base_img = None;
|
||||||
history.clear();
|
history.clear();
|
||||||
history.push((*lsn, rec));
|
history.push((*lsn, rec));
|
||||||
@@ -895,7 +1023,9 @@ impl KeyHistoryRetention {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Ensure the latest record is readable.
|
// Ensure the latest record is readable.
|
||||||
collect_and_verify(key, max_lsn, &base_img, &history, tline).await?;
|
collect_and_verify(key, max_lsn, &base_img, &history, tline, false)
|
||||||
|
.await
|
||||||
|
.context("latest record")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1222,8 +1352,7 @@ impl Timeline {
|
|||||||
let partition_count = self.partitioning.read().0.0.parts.len();
|
let partition_count = self.partitioning.read().0.0.parts.len();
|
||||||
|
|
||||||
// 4. Shard ancestor compaction
|
// 4. Shard ancestor compaction
|
||||||
|
if self.get_compaction_shard_ancestor() && self.shard_identity.count >= ShardCount::new(2) {
|
||||||
if self.shard_identity.count >= ShardCount::new(2) {
|
|
||||||
// Limit the number of layer rewrites to the number of partitions: this means its
|
// Limit the number of layer rewrites to the number of partitions: this means its
|
||||||
// runtime should be comparable to a full round of image layer creations, rather than
|
// runtime should be comparable to a full round of image layer creations, rather than
|
||||||
// being potentially much longer.
|
// being potentially much longer.
|
||||||
@@ -1273,7 +1402,10 @@ impl Timeline {
|
|||||||
let pitr_cutoff = self.gc_info.read().unwrap().cutoffs.time;
|
let pitr_cutoff = self.gc_info.read().unwrap().cutoffs.time;
|
||||||
|
|
||||||
let layers = self.layers.read().await;
|
let layers = self.layers.read().await;
|
||||||
for layer_desc in layers.layer_map()?.iter_historic_layers() {
|
let layers_iter = layers.layer_map()?.iter_historic_layers();
|
||||||
|
let (layers_total, mut layers_checked) = (layers_iter.len(), 0);
|
||||||
|
for layer_desc in layers_iter {
|
||||||
|
layers_checked += 1;
|
||||||
let layer = layers.get_from_desc(&layer_desc);
|
let layer = layers.get_from_desc(&layer_desc);
|
||||||
if layer.metadata().shard.shard_count == self.shard_identity.count {
|
if layer.metadata().shard.shard_count == self.shard_identity.count {
|
||||||
// This layer does not belong to a historic ancestor, no need to re-image it.
|
// This layer does not belong to a historic ancestor, no need to re-image it.
|
||||||
@@ -1317,14 +1449,15 @@ impl Timeline {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't bother re-writing a layer unless it will at least halve its size
|
// Only rewrite a layer if we can reclaim significant space.
|
||||||
if layer_local_page_count != u32::MAX
|
if layer_local_page_count != u32::MAX
|
||||||
&& layer_local_page_count > layer_raw_page_count / 2
|
&& layer_local_page_count as f64 / layer_raw_page_count as f64
|
||||||
|
<= ANCESTOR_COMPACTION_REWRITE_THRESHOLD
|
||||||
{
|
{
|
||||||
debug!(%layer,
|
debug!(%layer,
|
||||||
"layer is already mostly local ({}/{}), not rewriting",
|
"layer has a large share of local pages \
|
||||||
layer_local_page_count,
|
({layer_local_page_count}/{layer_raw_page_count} > \
|
||||||
layer_raw_page_count
|
{ANCESTOR_COMPACTION_REWRITE_THRESHOLD}), not rewriting",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1336,12 +1469,19 @@ impl Timeline {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We do not yet implement rewrite of delta layers.
|
||||||
if layer_desc.is_delta() {
|
if layer_desc.is_delta() {
|
||||||
// We do not yet implement rewrite of delta layers
|
|
||||||
debug!(%layer, "Skipping rewrite of delta layer");
|
debug!(%layer, "Skipping rewrite of delta layer");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We don't bother rewriting layers that aren't visible, since these won't be needed by
|
||||||
|
// reads and will likely be garbage collected soon.
|
||||||
|
if layer.visibility() != LayerVisibilityHint::Visible {
|
||||||
|
debug!(%layer, "Skipping rewrite of invisible layer");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Only rewrite layers if their generations differ. This guarantees:
|
// Only rewrite layers if their generations differ. This guarantees:
|
||||||
// - that local rewrite is safe, as local layer paths will differ between existing layer and rewritten one
|
// - that local rewrite is safe, as local layer paths will differ between existing layer and rewritten one
|
||||||
// - that the layer is persistent in remote storage, as we only see old-generation'd layer via loading from remote storage
|
// - that the layer is persistent in remote storage, as we only see old-generation'd layer via loading from remote storage
|
||||||
@@ -1371,7 +1511,8 @@ impl Timeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"starting shard ancestor compaction, rewriting {} layers and dropping {} layers \
|
"starting shard ancestor compaction, rewriting {} layers and dropping {} layers, \
|
||||||
|
checked {layers_checked}/{layers_total} layers \
|
||||||
(latest_gc_cutoff={} pitr_cutoff={})",
|
(latest_gc_cutoff={} pitr_cutoff={})",
|
||||||
layers_to_rewrite.len(),
|
layers_to_rewrite.len(),
|
||||||
drop_layers.len(),
|
drop_layers.len(),
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ use crate::tenant::remote_timeline_client::{
|
|||||||
PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
|
PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
|
||||||
};
|
};
|
||||||
use crate::tenant::{
|
use crate::tenant::{
|
||||||
CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant, TenantManifestError,
|
CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, TenantManifestError,
|
||||||
Timeline, TimelineOrOffloaded,
|
TenantShard, Timeline, TimelineOrOffloaded,
|
||||||
};
|
};
|
||||||
use crate::virtual_file::MaybeFatalIo;
|
use crate::virtual_file::MaybeFatalIo;
|
||||||
|
|
||||||
@@ -113,7 +113,7 @@ pub(super) async fn delete_local_timeline_directory(
|
|||||||
/// It is important that this gets called when DeletionGuard is being held.
|
/// It is important that this gets called when DeletionGuard is being held.
|
||||||
/// For more context see comments in [`make_timeline_delete_guard`]
|
/// For more context see comments in [`make_timeline_delete_guard`]
|
||||||
async fn remove_maybe_offloaded_timeline_from_tenant(
|
async fn remove_maybe_offloaded_timeline_from_tenant(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline: &TimelineOrOffloaded,
|
timeline: &TimelineOrOffloaded,
|
||||||
_: &DeletionGuard, // using it as a witness
|
_: &DeletionGuard, // using it as a witness
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
@@ -192,7 +192,7 @@ impl DeleteTimelineFlow {
|
|||||||
// error out if some of the shutdown tasks have already been completed!
|
// error out if some of the shutdown tasks have already been completed!
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub async fn run(
|
pub async fn run(
|
||||||
tenant: &Arc<Tenant>,
|
tenant: &Arc<TenantShard>,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
) -> Result<(), DeleteTimelineError> {
|
) -> Result<(), DeleteTimelineError> {
|
||||||
super::debug_assert_current_span_has_tenant_and_timeline_id();
|
super::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
@@ -288,7 +288,7 @@ impl DeleteTimelineFlow {
|
|||||||
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
||||||
#[instrument(skip_all, fields(%timeline_id))]
|
#[instrument(skip_all, fields(%timeline_id))]
|
||||||
pub(crate) async fn resume_deletion(
|
pub(crate) async fn resume_deletion(
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
local_metadata: &TimelineMetadata,
|
local_metadata: &TimelineMetadata,
|
||||||
remote_client: RemoteTimelineClient,
|
remote_client: RemoteTimelineClient,
|
||||||
@@ -338,7 +338,7 @@ impl DeleteTimelineFlow {
|
|||||||
fn schedule_background(
|
fn schedule_background(
|
||||||
guard: DeletionGuard,
|
guard: DeletionGuard,
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
timeline: TimelineOrOffloaded,
|
timeline: TimelineOrOffloaded,
|
||||||
remote_client: Arc<RemoteTimelineClient>,
|
remote_client: Arc<RemoteTimelineClient>,
|
||||||
) {
|
) {
|
||||||
@@ -381,7 +381,7 @@ impl DeleteTimelineFlow {
|
|||||||
async fn background(
|
async fn background(
|
||||||
mut guard: DeletionGuard,
|
mut guard: DeletionGuard,
|
||||||
conf: &PageServerConf,
|
conf: &PageServerConf,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline: &TimelineOrOffloaded,
|
timeline: &TimelineOrOffloaded,
|
||||||
remote_client: Arc<RemoteTimelineClient>,
|
remote_client: Arc<RemoteTimelineClient>,
|
||||||
) -> Result<(), DeleteTimelineError> {
|
) -> Result<(), DeleteTimelineError> {
|
||||||
@@ -435,7 +435,7 @@ pub(super) enum TimelineDeleteGuardKind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn make_timeline_delete_guard(
|
pub(super) fn make_timeline_delete_guard(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
guard_kind: TimelineDeleteGuardKind,
|
guard_kind: TimelineDeleteGuardKind,
|
||||||
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ use super::layer_manager::LayerManager;
|
|||||||
use super::{FlushLayerError, Timeline};
|
use super::{FlushLayerError, Timeline};
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
use crate::tenant::Tenant;
|
use crate::tenant::TenantShard;
|
||||||
use crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor;
|
use crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor;
|
||||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||||
use crate::tenant::storage_layer::{
|
use crate::tenant::storage_layer::{
|
||||||
@@ -265,7 +265,7 @@ async fn generate_tombstone_image_layer(
|
|||||||
/// See [`Timeline::prepare_to_detach_from_ancestor`]
|
/// See [`Timeline::prepare_to_detach_from_ancestor`]
|
||||||
pub(super) async fn prepare(
|
pub(super) async fn prepare(
|
||||||
detached: &Arc<Timeline>,
|
detached: &Arc<Timeline>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
behavior: DetachBehavior,
|
behavior: DetachBehavior,
|
||||||
options: Options,
|
options: Options,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
@@ -590,7 +590,7 @@ pub(super) async fn prepare(
|
|||||||
|
|
||||||
async fn start_new_attempt(
|
async fn start_new_attempt(
|
||||||
detached: &Timeline,
|
detached: &Timeline,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
ancestor_timeline_id: TimelineId,
|
ancestor_timeline_id: TimelineId,
|
||||||
ancestor_lsn: Lsn,
|
ancestor_lsn: Lsn,
|
||||||
) -> Result<Attempt, Error> {
|
) -> Result<Attempt, Error> {
|
||||||
@@ -611,7 +611,7 @@ async fn start_new_attempt(
|
|||||||
|
|
||||||
async fn continue_with_blocked_gc(
|
async fn continue_with_blocked_gc(
|
||||||
detached: &Timeline,
|
detached: &Timeline,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
ancestor_timeline_id: TimelineId,
|
ancestor_timeline_id: TimelineId,
|
||||||
ancestor_lsn: Lsn,
|
ancestor_lsn: Lsn,
|
||||||
) -> Result<Attempt, Error> {
|
) -> Result<Attempt, Error> {
|
||||||
@@ -622,7 +622,7 @@ async fn continue_with_blocked_gc(
|
|||||||
|
|
||||||
fn obtain_exclusive_attempt(
|
fn obtain_exclusive_attempt(
|
||||||
detached: &Timeline,
|
detached: &Timeline,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
ancestor_timeline_id: TimelineId,
|
ancestor_timeline_id: TimelineId,
|
||||||
ancestor_lsn: Lsn,
|
ancestor_lsn: Lsn,
|
||||||
) -> Result<Attempt, Error> {
|
) -> Result<Attempt, Error> {
|
||||||
@@ -655,7 +655,7 @@ fn obtain_exclusive_attempt(
|
|||||||
|
|
||||||
fn reparented_direct_children(
|
fn reparented_direct_children(
|
||||||
detached: &Arc<Timeline>,
|
detached: &Arc<Timeline>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
) -> Result<HashSet<TimelineId>, Error> {
|
) -> Result<HashSet<TimelineId>, Error> {
|
||||||
let mut all_direct_children = tenant
|
let mut all_direct_children = tenant
|
||||||
.timelines
|
.timelines
|
||||||
@@ -950,7 +950,7 @@ impl DetachingAndReparenting {
|
|||||||
/// See [`Timeline::detach_from_ancestor_and_reparent`].
|
/// See [`Timeline::detach_from_ancestor_and_reparent`].
|
||||||
pub(super) async fn detach_and_reparent(
|
pub(super) async fn detach_and_reparent(
|
||||||
detached: &Arc<Timeline>,
|
detached: &Arc<Timeline>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
prepared: PreparedTimelineDetach,
|
prepared: PreparedTimelineDetach,
|
||||||
ancestor_timeline_id: TimelineId,
|
ancestor_timeline_id: TimelineId,
|
||||||
ancestor_lsn: Lsn,
|
ancestor_lsn: Lsn,
|
||||||
@@ -1184,7 +1184,7 @@ pub(super) async fn detach_and_reparent(
|
|||||||
|
|
||||||
pub(super) async fn complete(
|
pub(super) async fn complete(
|
||||||
detached: &Arc<Timeline>,
|
detached: &Arc<Timeline>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
mut attempt: Attempt,
|
mut attempt: Attempt,
|
||||||
_ctx: &RequestContext,
|
_ctx: &RequestContext,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
@@ -1258,7 +1258,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn check_no_archived_children_of_ancestor(
|
fn check_no_archived_children_of_ancestor(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
detached: &Arc<Timeline>,
|
detached: &Arc<Timeline>,
|
||||||
ancestor: &Arc<Timeline>,
|
ancestor: &Arc<Timeline>,
|
||||||
ancestor_lsn: Lsn,
|
ancestor_lsn: Lsn,
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ use crate::tenant::size::CalculateSyntheticSizeError;
|
|||||||
use crate::tenant::storage_layer::LayerVisibilityHint;
|
use crate::tenant::storage_layer::LayerVisibilityHint;
|
||||||
use crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random};
|
use crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random};
|
||||||
use crate::tenant::timeline::EvictionError;
|
use crate::tenant::timeline::EvictionError;
|
||||||
use crate::tenant::{LogicalSizeCalculationCause, Tenant};
|
use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct EvictionTaskTimelineState {
|
pub struct EvictionTaskTimelineState {
|
||||||
@@ -48,7 +48,7 @@ pub struct EvictionTaskTenantState {
|
|||||||
impl Timeline {
|
impl Timeline {
|
||||||
pub(super) fn launch_eviction_task(
|
pub(super) fn launch_eviction_task(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
parent: Arc<Tenant>,
|
parent: Arc<TenantShard>,
|
||||||
background_tasks_can_start: Option<&completion::Barrier>,
|
background_tasks_can_start: Option<&completion::Barrier>,
|
||||||
) {
|
) {
|
||||||
let self_clone = Arc::clone(self);
|
let self_clone = Arc::clone(self);
|
||||||
@@ -75,7 +75,7 @@ impl Timeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
||||||
async fn eviction_task(self: Arc<Self>, tenant: Arc<Tenant>) {
|
async fn eviction_task(self: Arc<Self>, tenant: Arc<TenantShard>) {
|
||||||
// acquire the gate guard only once within a useful span
|
// acquire the gate guard only once within a useful span
|
||||||
let Ok(guard) = self.gate.enter() else {
|
let Ok(guard) = self.gate.enter() else {
|
||||||
return;
|
return;
|
||||||
@@ -118,7 +118,7 @@ impl Timeline {
|
|||||||
#[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
|
#[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
|
||||||
async fn eviction_iteration(
|
async fn eviction_iteration(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
policy: &EvictionPolicy,
|
policy: &EvictionPolicy,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
gate: &GateGuard,
|
gate: &GateGuard,
|
||||||
@@ -175,7 +175,7 @@ impl Timeline {
|
|||||||
|
|
||||||
async fn eviction_iteration_threshold(
|
async fn eviction_iteration_threshold(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
p: &EvictionPolicyLayerAccessThreshold,
|
p: &EvictionPolicyLayerAccessThreshold,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
gate: &GateGuard,
|
gate: &GateGuard,
|
||||||
@@ -309,7 +309,7 @@ impl Timeline {
|
|||||||
/// disk usage based eviction task.
|
/// disk usage based eviction task.
|
||||||
async fn imitiate_only(
|
async fn imitiate_only(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
p: &EvictionPolicyLayerAccessThreshold,
|
p: &EvictionPolicyLayerAccessThreshold,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
gate: &GateGuard,
|
gate: &GateGuard,
|
||||||
@@ -363,7 +363,7 @@ impl Timeline {
|
|||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
async fn imitate_layer_accesses(
|
async fn imitate_layer_accesses(
|
||||||
&self,
|
&self,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
p: &EvictionPolicyLayerAccessThreshold,
|
p: &EvictionPolicyLayerAccessThreshold,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
gate: &GateGuard,
|
gate: &GateGuard,
|
||||||
@@ -499,7 +499,7 @@ impl Timeline {
|
|||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
async fn imitate_synthetic_size_calculation_worker(
|
async fn imitate_synthetic_size_calculation_worker(
|
||||||
&self,
|
&self,
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
cancel: &CancellationToken,
|
cancel: &CancellationToken,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) {
|
) {
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
|||||||
use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
|
use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
|
||||||
use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
|
use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
|
||||||
use crate::tenant::{
|
use crate::tenant::{
|
||||||
DeleteTimelineError, OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded,
|
DeleteTimelineError, OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(thiserror::Error, Debug)]
|
#[derive(thiserror::Error, Debug)]
|
||||||
@@ -33,7 +33,7 @@ impl From<TenantManifestError> for OffloadError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn offload_timeline(
|
pub(crate) async fn offload_timeline(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline: &Arc<Timeline>,
|
timeline: &Arc<Timeline>,
|
||||||
) -> Result<(), OffloadError> {
|
) -> Result<(), OffloadError> {
|
||||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
@@ -123,7 +123,7 @@ pub(crate) async fn offload_timeline(
|
|||||||
///
|
///
|
||||||
/// Returns the strong count of the timeline `Arc`
|
/// Returns the strong count of the timeline `Arc`
|
||||||
fn remove_timeline_from_tenant(
|
fn remove_timeline_from_tenant(
|
||||||
tenant: &Tenant,
|
tenant: &TenantShard,
|
||||||
timeline: &Timeline,
|
timeline: &Timeline,
|
||||||
_: &DeletionGuard, // using it as a witness
|
_: &DeletionGuard, // using it as a witness
|
||||||
) -> usize {
|
) -> usize {
|
||||||
|
|||||||
@@ -15,17 +15,19 @@ use super::Timeline;
|
|||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::import_datadir;
|
use crate::import_datadir;
|
||||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||||
use crate::tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded};
|
use crate::tenant::{
|
||||||
|
CreateTimelineError, CreateTimelineIdempotency, TenantShard, TimelineOrOffloaded,
|
||||||
|
};
|
||||||
|
|
||||||
/// A timeline with some of its files on disk, being initialized.
|
/// A timeline with some of its files on disk, being initialized.
|
||||||
/// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
|
/// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
|
||||||
/// its local files are removed. If we crash while this class exists, then the timeline's local
|
/// its local files are removed. If we crash while this class exists, then the timeline's local
|
||||||
/// state is cleaned up during [`Tenant::clean_up_timelines`], because the timeline's content isn't in remote storage.
|
/// state is cleaned up during [`TenantShard::clean_up_timelines`], because the timeline's content isn't in remote storage.
|
||||||
///
|
///
|
||||||
/// The caller is responsible for proper timeline data filling before the final init.
|
/// The caller is responsible for proper timeline data filling before the final init.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub struct UninitializedTimeline<'t> {
|
pub struct UninitializedTimeline<'t> {
|
||||||
pub(crate) owning_tenant: &'t Tenant,
|
pub(crate) owning_tenant: &'t TenantShard,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||||
/// Whether we spawned the inner Timeline's tasks such that we must later shut it down
|
/// Whether we spawned the inner Timeline's tasks such that we must later shut it down
|
||||||
@@ -35,7 +37,7 @@ pub struct UninitializedTimeline<'t> {
|
|||||||
|
|
||||||
impl<'t> UninitializedTimeline<'t> {
|
impl<'t> UninitializedTimeline<'t> {
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
owning_tenant: &'t Tenant,
|
owning_tenant: &'t TenantShard,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
@@ -156,7 +158,7 @@ impl<'t> UninitializedTimeline<'t> {
|
|||||||
/// Prepares timeline data by loading it from the basebackup archive.
|
/// Prepares timeline data by loading it from the basebackup archive.
|
||||||
pub(crate) async fn import_basebackup_from_tar(
|
pub(crate) async fn import_basebackup_from_tar(
|
||||||
mut self,
|
mut self,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<TenantShard>,
|
||||||
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
|
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
|
||||||
base_lsn: Lsn,
|
base_lsn: Lsn,
|
||||||
broker_client: storage_broker::BrokerClientChannel,
|
broker_client: storage_broker::BrokerClientChannel,
|
||||||
@@ -227,17 +229,17 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
|
|||||||
error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
|
error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Having cleaned up, we can release this TimelineId in `[Tenant::timelines_creating]` to allow other
|
// Having cleaned up, we can release this TimelineId in `[TenantShard::timelines_creating]` to allow other
|
||||||
// timeline creation attempts under this TimelineId to proceed
|
// timeline creation attempts under this TimelineId to proceed
|
||||||
drop(create_guard);
|
drop(create_guard);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A guard for timeline creations in process: as long as this object exists, the timeline ID
|
/// A guard for timeline creations in process: as long as this object exists, the timeline ID
|
||||||
/// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
/// is kept in `[TenantShard::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) struct TimelineCreateGuard {
|
pub(crate) struct TimelineCreateGuard {
|
||||||
pub(crate) _tenant_gate_guard: GateGuard,
|
pub(crate) _tenant_gate_guard: GateGuard,
|
||||||
pub(crate) owning_tenant: Arc<Tenant>,
|
pub(crate) owning_tenant: Arc<TenantShard>,
|
||||||
pub(crate) timeline_id: TimelineId,
|
pub(crate) timeline_id: TimelineId,
|
||||||
pub(crate) timeline_path: Utf8PathBuf,
|
pub(crate) timeline_path: Utf8PathBuf,
|
||||||
pub(crate) idempotency: CreateTimelineIdempotency,
|
pub(crate) idempotency: CreateTimelineIdempotency,
|
||||||
@@ -263,7 +265,7 @@ pub(crate) enum TimelineExclusionError {
|
|||||||
|
|
||||||
impl TimelineCreateGuard {
|
impl TimelineCreateGuard {
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
owning_tenant: &Arc<Tenant>,
|
owning_tenant: &Arc<TenantShard>,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
timeline_path: Utf8PathBuf,
|
timeline_path: Utf8PathBuf,
|
||||||
idempotency: CreateTimelineIdempotency,
|
idempotency: CreateTimelineIdempotency,
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ use utils::lsn::Lsn;
|
|||||||
use utils::vec_map::VecMap;
|
use utils::vec_map::VecMap;
|
||||||
|
|
||||||
use crate::context::RequestContext;
|
use crate::context::RequestContext;
|
||||||
use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK};
|
use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, Header};
|
||||||
use crate::virtual_file::{self, IoBufferMut, VirtualFile};
|
use crate::virtual_file::{self, IoBufferMut, VirtualFile};
|
||||||
|
|
||||||
/// Metadata bundled with the start and end offset of a blob.
|
/// Metadata bundled with the start and end offset of a blob.
|
||||||
@@ -111,18 +111,20 @@ impl From<Bytes> for BufView<'_> {
|
|||||||
pub struct VectoredBlob {
|
pub struct VectoredBlob {
|
||||||
/// Blob metadata.
|
/// Blob metadata.
|
||||||
pub meta: BlobMeta,
|
pub meta: BlobMeta,
|
||||||
/// Start offset.
|
/// Header start offset.
|
||||||
start: usize,
|
header_start: usize,
|
||||||
|
/// Data start offset.
|
||||||
|
data_start: usize,
|
||||||
/// End offset.
|
/// End offset.
|
||||||
end: usize,
|
end: usize,
|
||||||
/// Compression used on the the blob.
|
/// Compression used on the data, extracted from the header.
|
||||||
compression_bits: u8,
|
compression_bits: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VectoredBlob {
|
impl VectoredBlob {
|
||||||
/// Reads a decompressed view of the blob.
|
/// Reads a decompressed view of the blob.
|
||||||
pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result<BufView<'a>, std::io::Error> {
|
pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result<BufView<'a>, std::io::Error> {
|
||||||
let view = buf.view(self.start..self.end);
|
let view = buf.view(self.data_start..self.end);
|
||||||
|
|
||||||
match self.compression_bits {
|
match self.compression_bits {
|
||||||
BYTE_UNCOMPRESSED => Ok(view),
|
BYTE_UNCOMPRESSED => Ok(view),
|
||||||
@@ -140,13 +142,18 @@ impl VectoredBlob {
|
|||||||
std::io::ErrorKind::InvalidData,
|
std::io::ErrorKind::InvalidData,
|
||||||
format!(
|
format!(
|
||||||
"Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}",
|
"Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}",
|
||||||
self.meta.key, self.meta.lsn, self.start, self.end
|
self.meta.key, self.meta.lsn, self.data_start, self.end
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
Err(error)
|
Err(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the raw blob including header.
|
||||||
|
pub(crate) fn raw_with_header<'a>(&self, buf: &BufView<'a>) -> BufView<'a> {
|
||||||
|
buf.view(self.header_start..self.end)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for VectoredBlob {
|
impl std::fmt::Display for VectoredBlob {
|
||||||
@@ -154,7 +161,7 @@ impl std::fmt::Display for VectoredBlob {
|
|||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{}@{}, {}..{}",
|
"{}@{}, {}..{}",
|
||||||
self.meta.key, self.meta.lsn, self.start, self.end
|
self.meta.key, self.meta.lsn, self.data_start, self.end
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -493,50 +500,28 @@ impl<'a> VectoredBlobReader<'a> {
|
|||||||
|
|
||||||
let blobs_at = read.blobs_at.as_slice();
|
let blobs_at = read.blobs_at.as_slice();
|
||||||
|
|
||||||
let start_offset = read.start;
|
let mut blobs = Vec::with_capacity(blobs_at.len());
|
||||||
|
|
||||||
let mut metas = Vec::with_capacity(blobs_at.len());
|
|
||||||
// Blobs in `read` only provide their starting offset. The end offset
|
// Blobs in `read` only provide their starting offset. The end offset
|
||||||
// of a blob is implicit: the start of the next blob if one exists
|
// of a blob is implicit: the start of the next blob if one exists
|
||||||
// or the end of the read.
|
// or the end of the read.
|
||||||
|
|
||||||
for (blob_start, meta) in blobs_at {
|
for (blob_start, meta) in blobs_at.iter().copied() {
|
||||||
let blob_start_in_buf = blob_start - start_offset;
|
let header_start = (blob_start - read.start) as usize;
|
||||||
let first_len_byte = buf[blob_start_in_buf as usize];
|
let header = Header::decode(&buf[header_start..])?;
|
||||||
|
let data_start = header_start + header.header_len;
|
||||||
|
let end = data_start + header.data_len;
|
||||||
|
let compression_bits = header.compression_bits;
|
||||||
|
|
||||||
// Each blob is prefixed by a header containing its size and compression information.
|
blobs.push(VectoredBlob {
|
||||||
// Extract the size and skip that header to find the start of the data.
|
header_start,
|
||||||
// The size can be 1 or 4 bytes. The most significant bit is 0 in the
|
data_start,
|
||||||
// 1 byte case and 1 in the 4 byte case.
|
|
||||||
let (size_length, blob_size, compression_bits) = if first_len_byte < 0x80 {
|
|
||||||
(1, first_len_byte as u64, BYTE_UNCOMPRESSED)
|
|
||||||
} else {
|
|
||||||
let mut blob_size_buf = [0u8; 4];
|
|
||||||
let offset_in_buf = blob_start_in_buf as usize;
|
|
||||||
|
|
||||||
blob_size_buf.copy_from_slice(&buf[offset_in_buf..offset_in_buf + 4]);
|
|
||||||
blob_size_buf[0] &= !LEN_COMPRESSION_BIT_MASK;
|
|
||||||
|
|
||||||
let compression_bits = first_len_byte & LEN_COMPRESSION_BIT_MASK;
|
|
||||||
(
|
|
||||||
4,
|
|
||||||
u32::from_be_bytes(blob_size_buf) as u64,
|
|
||||||
compression_bits,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
let start = (blob_start_in_buf + size_length) as usize;
|
|
||||||
let end = start + blob_size as usize;
|
|
||||||
|
|
||||||
metas.push(VectoredBlob {
|
|
||||||
start,
|
|
||||||
end,
|
end,
|
||||||
meta: *meta,
|
meta,
|
||||||
compression_bits,
|
compression_bits,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(VectoredBlobsBuf { buf, blobs: metas })
|
Ok(VectoredBlobsBuf { buf, blobs })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -997,6 +982,15 @@ mod tests {
|
|||||||
&read_buf[..],
|
&read_buf[..],
|
||||||
"mismatch for idx={idx} at offset={offset}"
|
"mismatch for idx={idx} at offset={offset}"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Check that raw_with_header returns a valid header.
|
||||||
|
let raw = read_blob.raw_with_header(&view);
|
||||||
|
let header = Header::decode(&raw)?;
|
||||||
|
if !compression || header.header_len == 1 {
|
||||||
|
assert_eq!(header.compression_bits, BYTE_UNCOMPRESSED);
|
||||||
|
}
|
||||||
|
assert_eq!(raw.len(), header.total_len());
|
||||||
|
|
||||||
buf = result.buf;
|
buf = result.buf;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1366,7 +1366,8 @@ pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment()
|
|||||||
pub(crate) type IoPageSlice<'a> =
|
pub(crate) type IoPageSlice<'a> =
|
||||||
AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;
|
AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;
|
||||||
|
|
||||||
static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
|
static IO_MODE: once_cell::sync::Lazy<AtomicU8> =
|
||||||
|
once_cell::sync::Lazy::new(|| AtomicU8::new(IoMode::preferred() as u8));
|
||||||
|
|
||||||
pub(crate) fn set_io_mode(mode: IoMode) {
|
pub(crate) fn set_io_mode(mode: IoMode) {
|
||||||
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
|
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ static uint32 local_request_counter;
|
|||||||
* Various settings related to prompt (fast) handling of PageStream responses
|
* Various settings related to prompt (fast) handling of PageStream responses
|
||||||
* at any CHECK_FOR_INTERRUPTS point.
|
* at any CHECK_FOR_INTERRUPTS point.
|
||||||
*/
|
*/
|
||||||
int readahead_getpage_pull_timeout_ms = 0;
|
int readahead_getpage_pull_timeout_ms = 50;
|
||||||
static int PS_TIMEOUT_ID = 0;
|
static int PS_TIMEOUT_ID = 0;
|
||||||
static bool timeout_set = false;
|
static bool timeout_set = false;
|
||||||
static bool timeout_signaled = false;
|
static bool timeout_signaled = false;
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ char *neon_auth_token;
|
|||||||
int readahead_buffer_size = 128;
|
int readahead_buffer_size = 128;
|
||||||
int flush_every_n_requests = 8;
|
int flush_every_n_requests = 8;
|
||||||
|
|
||||||
int neon_protocol_version = 2;
|
int neon_protocol_version = 3;
|
||||||
|
|
||||||
static int neon_compute_mode = 0;
|
static int neon_compute_mode = 0;
|
||||||
static int max_reconnect_attempts = 60;
|
static int max_reconnect_attempts = 60;
|
||||||
@@ -1362,7 +1362,7 @@ pg_init_libpagestore(void)
|
|||||||
"",
|
"",
|
||||||
PGC_POSTMASTER,
|
PGC_POSTMASTER,
|
||||||
0, /* no flags required */
|
0, /* no flags required */
|
||||||
check_neon_id, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
DefineCustomStringVariable("neon.branch_id",
|
DefineCustomStringVariable("neon.branch_id",
|
||||||
"Neon branch_id the server is running on",
|
"Neon branch_id the server is running on",
|
||||||
NULL,
|
NULL,
|
||||||
@@ -1370,7 +1370,7 @@ pg_init_libpagestore(void)
|
|||||||
"",
|
"",
|
||||||
PGC_POSTMASTER,
|
PGC_POSTMASTER,
|
||||||
0, /* no flags required */
|
0, /* no flags required */
|
||||||
check_neon_id, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
DefineCustomStringVariable("neon.endpoint_id",
|
DefineCustomStringVariable("neon.endpoint_id",
|
||||||
"Neon endpoint_id the server is running on",
|
"Neon endpoint_id the server is running on",
|
||||||
NULL,
|
NULL,
|
||||||
@@ -1378,7 +1378,7 @@ pg_init_libpagestore(void)
|
|||||||
"",
|
"",
|
||||||
PGC_POSTMASTER,
|
PGC_POSTMASTER,
|
||||||
0, /* no flags required */
|
0, /* no flags required */
|
||||||
check_neon_id, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomIntVariable("neon.stripe_size",
|
DefineCustomIntVariable("neon.stripe_size",
|
||||||
"sharding stripe size",
|
"sharding stripe size",
|
||||||
@@ -1432,7 +1432,7 @@ pg_init_libpagestore(void)
|
|||||||
"PageStream connection when we have pages which "
|
"PageStream connection when we have pages which "
|
||||||
"were read ahead but not yet received.",
|
"were read ahead but not yet received.",
|
||||||
&readahead_getpage_pull_timeout_ms,
|
&readahead_getpage_pull_timeout_ms,
|
||||||
0, 0, 5 * 60 * 1000,
|
50, 0, 5 * 60 * 1000,
|
||||||
PGC_USERSET,
|
PGC_USERSET,
|
||||||
GUC_UNIT_MS,
|
GUC_UNIT_MS,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
@@ -1440,7 +1440,7 @@ pg_init_libpagestore(void)
|
|||||||
"Version of compute<->page server protocol",
|
"Version of compute<->page server protocol",
|
||||||
NULL,
|
NULL,
|
||||||
&neon_protocol_version,
|
&neon_protocol_version,
|
||||||
2, /* use protocol version 2 */
|
3, /* use protocol version 3 */
|
||||||
2, /* min */
|
2, /* min */
|
||||||
3, /* max */
|
3, /* max */
|
||||||
PGC_SU_BACKEND,
|
PGC_SU_BACKEND,
|
||||||
|
|||||||
@@ -2040,7 +2040,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
|
|||||||
/*
|
/*
|
||||||
* neon_end_unlogged_build() -- Finish an unlogged rel build.
|
* neon_end_unlogged_build() -- Finish an unlogged rel build.
|
||||||
*
|
*
|
||||||
* Call this after you have finished WAL-logging an relation that was
|
* Call this after you have finished WAL-logging a relation that was
|
||||||
* first populated without WAL-logging.
|
* first populated without WAL-logging.
|
||||||
*
|
*
|
||||||
* This removes the local copy of the rel, since it's now been fully
|
* This removes the local copy of the rel, since it's now been fully
|
||||||
@@ -2059,14 +2059,35 @@ neon_end_unlogged_build(SMgrRelation reln)
|
|||||||
|
|
||||||
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
|
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
|
||||||
{
|
{
|
||||||
|
XLogRecPtr recptr;
|
||||||
|
BlockNumber nblocks;
|
||||||
|
|
||||||
Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
|
Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
|
||||||
Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);
|
Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the last-written LSN cache.
|
||||||
|
*
|
||||||
|
* The relation is still on local disk so we can get the size by
|
||||||
|
* calling mdnblocks() directly. For the LSN, GetXLogInsertRecPtr() is
|
||||||
|
* very conservative. If we could assume that this function is called
|
||||||
|
* from the same backend that WAL-logged the contents, we could use
|
||||||
|
* XactLastRecEnd here. But better safe than sorry.
|
||||||
|
*/
|
||||||
|
nblocks = mdnblocks(reln, MAIN_FORKNUM);
|
||||||
|
recptr = GetXLogInsertRecPtr();
|
||||||
|
|
||||||
|
neon_set_lwlsn_block_range(recptr,
|
||||||
|
InfoFromNInfoB(rinfob),
|
||||||
|
MAIN_FORKNUM, 0, nblocks);
|
||||||
|
neon_set_lwlsn_relation(recptr,
|
||||||
|
InfoFromNInfoB(rinfob),
|
||||||
|
MAIN_FORKNUM);
|
||||||
|
|
||||||
/* Make the relation look permanent again */
|
/* Make the relation look permanent again */
|
||||||
reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;
|
reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;
|
||||||
|
|
||||||
/* Remove local copy */
|
/* Remove local copy */
|
||||||
rinfob = InfoBFromSMgrRel(reln);
|
|
||||||
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
|
||||||
{
|
{
|
||||||
neon_log(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
|
neon_log(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
|
||||||
|
|||||||
@@ -890,7 +890,7 @@ libpqwp_connect_start(char *conninfo)
|
|||||||
* palloc will exit on failure though, so there's not much we could do if
|
* palloc will exit on failure though, so there's not much we could do if
|
||||||
* it *did* fail.
|
* it *did* fail.
|
||||||
*/
|
*/
|
||||||
conn = palloc(sizeof(WalProposerConn));
|
conn = (WalProposerConn*)MemoryContextAllocZero(TopMemoryContext, sizeof(WalProposerConn));
|
||||||
conn->pg_conn = pg_conn;
|
conn->pg_conn = pg_conn;
|
||||||
conn->is_nonblocking = false; /* connections always start in blocking
|
conn->is_nonblocking = false; /* connections always start in blocking
|
||||||
* mode */
|
* mode */
|
||||||
|
|||||||
@@ -776,7 +776,6 @@ impl From<&jose_jwk::Key> for KeyType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::future::IntoFuture;
|
use std::future::IntoFuture;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
|
|||||||
@@ -253,7 +253,6 @@ fn project_name_valid(name: &str) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use ComputeUserInfoParseError::*;
|
use ComputeUserInfoParseError::*;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|||||||
@@ -258,7 +258,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
"unexpected startup packet, rejecting connection"
|
"unexpected startup packet, rejecting connection"
|
||||||
);
|
);
|
||||||
stream
|
stream
|
||||||
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User)
|
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User, None)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
1
proxy/src/cache/endpoints.rs
vendored
1
proxy/src/cache/endpoints.rs
vendored
@@ -259,7 +259,6 @@ impl EndpointsCache {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|||||||
1
proxy/src/cache/project_info.rs
vendored
1
proxy/src/cache/project_info.rs
vendored
@@ -585,7 +585,6 @@ impl Cache for ProjectInfoCacheImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::scram::ServerSecret;
|
use crate::scram::ServerSecret;
|
||||||
|
|||||||
@@ -222,7 +222,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
{
|
{
|
||||||
Ok(auth_result) => auth_result,
|
Ok(auth_result) => auth_result,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return stream.throw_error(e).await?;
|
return stream.throw_error(e, Some(ctx)).await?;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -238,7 +238,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
config.wake_compute_retry_config,
|
config.wake_compute_retry_config,
|
||||||
&config.connect_to_compute,
|
&config.connect_to_compute,
|
||||||
)
|
)
|
||||||
.or_else(|e| stream.throw_error(e))
|
.or_else(|e| stream.throw_error(e, Some(ctx)))
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ struct RequestContextInner {
|
|||||||
success: bool,
|
success: bool,
|
||||||
pub(crate) cold_start_info: ColdStartInfo,
|
pub(crate) cold_start_info: ColdStartInfo,
|
||||||
pg_options: Option<StartupMessageParams>,
|
pg_options: Option<StartupMessageParams>,
|
||||||
testodrome_query_id: Option<String>,
|
testodrome_query_id: Option<SmolStr>,
|
||||||
|
|
||||||
// extra
|
// extra
|
||||||
// This sender is here to keep the request monitoring channel open while requests are taking place.
|
// This sender is here to keep the request monitoring channel open while requests are taking place.
|
||||||
@@ -219,7 +219,7 @@ impl RequestContext {
|
|||||||
for option in options_str.split_whitespace() {
|
for option in options_str.split_whitespace() {
|
||||||
if option.starts_with("neon_query_id:") {
|
if option.starts_with("neon_query_id:") {
|
||||||
if let Some(value) = option.strip_prefix("neon_query_id:") {
|
if let Some(value) = option.strip_prefix("neon_query_id:") {
|
||||||
this.set_testodrome_id(value.to_string());
|
this.set_testodrome_id(value.into());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -272,7 +272,7 @@ impl RequestContext {
|
|||||||
.set_user_agent(user_agent);
|
.set_user_agent(user_agent);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_testodrome_id(&self, query_id: String) {
|
pub(crate) fn set_testodrome_id(&self, query_id: SmolStr) {
|
||||||
self.0
|
self.0
|
||||||
.try_lock()
|
.try_lock()
|
||||||
.expect("should not deadlock")
|
.expect("should not deadlock")
|
||||||
@@ -378,7 +378,7 @@ impl RequestContext {
|
|||||||
.accumulated()
|
.accumulated()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_testodrome_id(&self) -> Option<String> {
|
pub(crate) fn get_testodrome_id(&self) -> Option<SmolStr> {
|
||||||
self.0
|
self.0
|
||||||
.try_lock()
|
.try_lock()
|
||||||
.expect("should not deadlock")
|
.expect("should not deadlock")
|
||||||
@@ -447,7 +447,7 @@ impl RequestContextInner {
|
|||||||
self.user = Some(user);
|
self.user = Some(user);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_testodrome_id(&mut self, query_id: String) {
|
fn set_testodrome_id(&mut self, query_id: SmolStr) {
|
||||||
self.testodrome_query_id = Some(query_id);
|
self.testodrome_query_id = Some(query_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -416,7 +416,6 @@ async fn upload_parquet(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::net::Ipv4Addr;
|
use std::net::Ipv4Addr;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|||||||
@@ -227,7 +227,6 @@ impl From<AccountId> for AccountIdInt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
|
|||||||
@@ -1032,7 +1032,6 @@ impl<const F: usize> serde::ser::Serialize for ExtractedSpanFields<'_, F> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[allow(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::sync::{Arc, Mutex, MutexGuard};
|
use std::sync::{Arc, Mutex, MutexGuard};
|
||||||
|
|||||||
@@ -400,7 +400,6 @@ impl NetworkEndianIpv6 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use tokio::io::AsyncReadExt;
|
use tokio::io::AsyncReadExt;
|
||||||
|
|
||||||
|
|||||||
@@ -262,7 +262,6 @@ impl CopyBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
|
|||||||
@@ -196,7 +196,11 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
// OR we didn't provide it at all (for dev purposes).
|
// OR we didn't provide it at all (for dev purposes).
|
||||||
if tls.is_some() {
|
if tls.is_some() {
|
||||||
return stream
|
return stream
|
||||||
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User)
|
.throw_error_str(
|
||||||
|
ERR_INSECURE_CONNECTION,
|
||||||
|
crate::error::ErrorKind::User,
|
||||||
|
None,
|
||||||
|
)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -329,7 +329,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
|
|
||||||
let user_info = match result {
|
let user_info = match result {
|
||||||
Ok(user_info) => user_info,
|
Ok(user_info) => user_info,
|
||||||
Err(e) => stream.throw_error(e).await?,
|
Err(e) => stream.throw_error(e, Some(ctx)).await?,
|
||||||
};
|
};
|
||||||
|
|
||||||
let user = user_info.get_user().to_owned();
|
let user = user_info.get_user().to_owned();
|
||||||
@@ -349,7 +349,10 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
let app = params.get("application_name");
|
let app = params.get("application_name");
|
||||||
let params_span = tracing::info_span!("", ?user, ?db, ?app);
|
let params_span = tracing::info_span!("", ?user, ?db, ?app);
|
||||||
|
|
||||||
return stream.throw_error(e).instrument(params_span).await?;
|
return stream
|
||||||
|
.throw_error(e, Some(ctx))
|
||||||
|
.instrument(params_span)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -374,7 +377,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
config.wake_compute_retry_config,
|
config.wake_compute_retry_config,
|
||||||
&config.connect_to_compute,
|
&config.connect_to_compute,
|
||||||
)
|
)
|
||||||
.or_else(|e| stream.throw_error(e))
|
.or_else(|e| stream.throw_error(e, Some(ctx)))
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//! A group of high-level tests for connection establishing logic and auth.
|
//! A group of high-level tests for connection establishing logic and auth.
|
||||||
#![allow(clippy::unimplemented, clippy::unwrap_used)]
|
#![allow(clippy::unimplemented)]
|
||||||
|
|
||||||
mod mitm;
|
mod mitm;
|
||||||
|
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ impl From<LeakyBucketConfig> for utils::leaky_bucket::LeakyBucketConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[allow(clippy::float_cmp, clippy::unwrap_used)]
|
#[allow(clippy::float_cmp)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
|||||||
@@ -63,7 +63,6 @@ impl LimitAlgorithm for Aimd {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
|||||||
@@ -259,7 +259,6 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::hash::BuildHasherDefault;
|
use std::hash::BuildHasherDefault;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|||||||
@@ -51,7 +51,6 @@ impl<'a> ServerMessage<&'a str> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[expect(clippy::unwrap_used)]
|
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user