mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-14 19:50:38 +00:00
Compare commits
5 Commits
skyzh/work
...
jcsp/shard
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
652479cf62 | ||
|
|
b82f037ab1 | ||
|
|
fa96b758bc | ||
|
|
ecfe76865b | ||
|
|
2fa492943a |
10
.github/workflows/_build-and-test-locally.yml
vendored
10
.github/workflows/_build-and-test-locally.yml
vendored
@@ -229,13 +229,8 @@ jobs:
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
|
||||
|
||||
# run pageserver tests with different settings
|
||||
for get_vectored_concurrent_io in sequential sidecar-task; do
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
@@ -319,7 +314,6 @@ jobs:
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
|
||||
17
.github/workflows/build_and_test.yml
vendored
17
.github/workflows/build_and_test.yml
vendored
@@ -820,11 +820,11 @@ jobs:
|
||||
- name: Print logs and clean up
|
||||
if: always()
|
||||
run: |
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
|
||||
docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
|
||||
docker compose -f ./docker-compose/docker-compose.yml logs || 0
|
||||
docker compose -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
promote-images-dev:
|
||||
needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
|
||||
needs: [ check-permissions, tag, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
@@ -859,7 +859,7 @@ jobs:
|
||||
done
|
||||
|
||||
promote-images-prod:
|
||||
needs: [ check-permissions, tag, test-images, promote-images-dev ]
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
|
||||
@@ -1078,6 +1078,12 @@ jobs:
|
||||
console.log(`Tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
// TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok
|
||||
if (context.ref !== 'refs/heads/release') {
|
||||
console.log(`GitHub release skipped for ${context.ref}.`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const existingRelease = await github.rest.repos.getReleaseByTag({
|
||||
owner: context.repo.owner,
|
||||
@@ -1096,8 +1102,7 @@ jobs:
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag_name: tag,
|
||||
// TODO: Automate release notes properly
|
||||
generate_release_notes: false,
|
||||
generate_release_notes: true,
|
||||
});
|
||||
console.log(`Release for tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
5
.github/workflows/release.yml
vendored
5
.github/workflows/release.yml
vendored
@@ -3,9 +3,8 @@ name: Create Release Branch
|
||||
on:
|
||||
schedule:
|
||||
# It should be kept in sync with if-condition in jobs
|
||||
- cron: '0 6 * * THU' # Proxy release
|
||||
- cron: '0 6 * * FRI' # Storage release
|
||||
- cron: '0 7 * * FRI' # Compute release
|
||||
- cron: '0 6 * * THU' # Proxy release
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
create-storage-release-branch:
|
||||
@@ -56,7 +55,7 @@ jobs:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
create-compute-release-branch:
|
||||
if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
|
||||
if: inputs.create-compute-release-branch
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
172
Cargo.lock
generated
172
Cargo.lock
generated
@@ -942,18 +942,6 @@ version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "bb8"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d89aabfae550a5c44b43ab941844ffcd2e993cb6900b342debf59e9ea74acdb8"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"futures-util",
|
||||
"parking_lot 0.12.1",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bcder"
|
||||
version = "0.7.4"
|
||||
@@ -1313,7 +1301,7 @@ dependencies = [
|
||||
"tar",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tower 0.5.2",
|
||||
@@ -1324,7 +1312,6 @@ dependencies = [
|
||||
"tracing-utils",
|
||||
"url",
|
||||
"utils",
|
||||
"uuid",
|
||||
"vm_monitor",
|
||||
"workspace_hack",
|
||||
"zstd",
|
||||
@@ -1422,7 +1409,7 @@ dependencies = [
|
||||
"storage_broker",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-util",
|
||||
"toml",
|
||||
"toml_edit",
|
||||
@@ -1798,24 +1785,11 @@ dependencies = [
|
||||
"chrono",
|
||||
"diesel_derives",
|
||||
"itoa",
|
||||
"pq-sys",
|
||||
"r2d2",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diesel-async"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51a307ac00f7c23f526a04a77761a0519b9f0eb2838ebf5b905a58580095bdcb"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bb8",
|
||||
"diesel",
|
||||
"futures-util",
|
||||
"scoped-futures",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diesel_derives"
|
||||
version = "2.2.1"
|
||||
@@ -4067,8 +4041,8 @@ dependencies = [
|
||||
"pageserver_compaction",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-types 0.2.4",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
@@ -4099,7 +4073,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-epoll-uring",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
@@ -4157,7 +4131,7 @@ dependencies = [
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"utils",
|
||||
@@ -4463,7 +4437,7 @@ dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4484,24 +4458,6 @@ dependencies = [
|
||||
"stringprep",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"hmac",
|
||||
"md-5",
|
||||
"memchr",
|
||||
"rand 0.8.5",
|
||||
"sha2",
|
||||
"stringprep",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-protocol2"
|
||||
version = "0.1.0"
|
||||
@@ -4525,18 +4481,7 @@ source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#511f
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f66ea23a2d0e5734297357705193335e0a957696f34bed2f2faefacb2fec336f"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol 0.6.7",
|
||||
"postgres-protocol",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4561,7 +4506,7 @@ dependencies = [
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-util",
|
||||
@@ -4576,7 +4521,7 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"url",
|
||||
]
|
||||
|
||||
@@ -4663,6 +4608,15 @@ version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "pq-sys"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
|
||||
dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pq_proto"
|
||||
version = "0.1.0"
|
||||
@@ -4670,7 +4624,7 @@ dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"itertools 0.10.5",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-protocol",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
"thiserror",
|
||||
@@ -4918,7 +4872,7 @@ dependencies = [
|
||||
"tikv-jemalloc-ctl",
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres2",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-tungstenite 0.21.0",
|
||||
@@ -4975,6 +4929,17 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r2d2"
|
||||
version = "0.8.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93"
|
||||
dependencies = [
|
||||
"log",
|
||||
"parking_lot 0.12.1",
|
||||
"scheduled-thread-pool",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
@@ -5706,7 +5671,7 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
"postgres",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"pprof",
|
||||
@@ -5730,7 +5695,7 @@ dependencies = [
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-io-timeout",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
@@ -5789,12 +5754,12 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scoped-futures"
|
||||
version = "0.1.4"
|
||||
name = "scheduled-thread-pool"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b24aae2d0636530f359e9d5ef0c04669d11c5e756699b27a6a6d845d8329091"
|
||||
checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19"
|
||||
dependencies = [
|
||||
"pin-project-lite",
|
||||
"parking_lot 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6329,7 +6294,6 @@ dependencies = [
|
||||
"clap",
|
||||
"control_plane",
|
||||
"diesel",
|
||||
"diesel-async",
|
||||
"diesel_migrations",
|
||||
"fail",
|
||||
"futures",
|
||||
@@ -6344,10 +6308,10 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"postgres_connection",
|
||||
"r2d2",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"routerify",
|
||||
"scoped-futures",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -6400,7 +6364,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"storage_controller_client",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
@@ -6809,7 +6773,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-epoll-uring"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#33e00106a268644d02ba0461bbd64476073b0ee1"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"nix 0.26.4",
|
||||
@@ -6859,39 +6823,13 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol 0.6.4",
|
||||
"postgres-types 0.2.4",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b5d3742945bc7d7f210693b0c58ae542c6fd47b17adbbda0885f3dcb34a6bdb"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"log",
|
||||
"parking_lot 0.12.1",
|
||||
"percent-encoding",
|
||||
"phf",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol 0.6.7",
|
||||
"postgres-types 0.2.8",
|
||||
"rand 0.8.5",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"whoami",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres-rustls"
|
||||
version = "0.12.0"
|
||||
@@ -6901,7 +6839,7 @@ dependencies = [
|
||||
"ring",
|
||||
"rustls 0.23.18",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.26.0",
|
||||
"x509-certificate",
|
||||
]
|
||||
@@ -7065,9 +7003,12 @@ version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"h2 0.4.4",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
@@ -7079,6 +7020,7 @@ dependencies = [
|
||||
"prost",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-stream",
|
||||
@@ -7430,7 +7372,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "uring-common"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#33e00106a268644d02ba0461bbd64476073b0ee1"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"io-uring",
|
||||
@@ -7559,6 +7501,12 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
@@ -7578,7 +7526,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"sysinfo",
|
||||
"tokio",
|
||||
"tokio-postgres 0.7.7",
|
||||
"tokio-postgres",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
@@ -7633,6 +7581,7 @@ dependencies = [
|
||||
"tikv-jemallocator",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
"tracing",
|
||||
"utils",
|
||||
@@ -8041,6 +7990,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum-core",
|
||||
"base64 0.13.1",
|
||||
"base64 0.21.1",
|
||||
"base64ct",
|
||||
@@ -8121,6 +8072,7 @@ dependencies = [
|
||||
"toml_edit",
|
||||
"tonic",
|
||||
"tower 0.4.13",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"url",
|
||||
|
||||
@@ -187,7 +187,7 @@ tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
|
||||
tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
|
||||
tower-service = "0.3.3"
|
||||
|
||||
2
Makefile
2
Makefile
@@ -64,6 +64,8 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
|
||||
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
||||
# Force cargo not to print progress bar
|
||||
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||
|
||||
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||
|
||||
|
||||
@@ -67,9 +67,6 @@ RUN cd postgres && \
|
||||
# Enable some of contrib extensions
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
|
||||
file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
|
||||
echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
|
||||
@@ -363,8 +360,6 @@ COPY compute/patches/pgvector.patch /pgvector.patch
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.8.0.tar.gz -O pgvector.tar.gz && \
|
||||
echo "867a2c328d4928a5a9d6f052cd3bc78c7d60228a9b914ad32aa3db88e9de27b0 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
wget https://github.com/pgvector/pgvector/raw/refs/tags/v0.7.4/sql/vector.sql -O ./sql/vector--0.7.4.sql && \
|
||||
echo "10218d05dc02299562252a9484775178b14a1d8edb92a2d1672ef488530f7778 ./sql/vector--0.7.4.sql" | sha256sum --check && \
|
||||
patch -p1 < /pgvector.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install && \
|
||||
@@ -1000,50 +995,24 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/9118dd4549b7d8c0bbc98e04
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-pgx-ulid-build"
|
||||
# Compile "pgx_ulid" extension for v16 and below
|
||||
# Compile "pgx_ulid" extension
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-pgx-ulid-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16") \
|
||||
;; \
|
||||
*) \
|
||||
echo "skipping the version of pgx_ulid for $PG_VERSION" && exit 0 \
|
||||
;; \
|
||||
# doesn't support v17 yet
|
||||
# https://github.com/pksunkara/pgx_ulid/pull/52
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ulid.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-pgx-ulid-pgrx12-build"
|
||||
# Compile "pgx_ulid" extension for v17 and up
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build-pgrx12 AS pg-pgx-ulid-pgrx12-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
;; \
|
||||
*) \
|
||||
echo "skipping the version of pgx_ulid for $PG_VERSION" && exit 0 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.2.0.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "cef6a9a2e5e7bd1a10a18989286586ee9e6c1c06005a4055cff190de41bf3e9f pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "^0.12.7"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgx_ulid.control
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1188,7 +1157,6 @@ COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-pgx-ulid-pgrx12-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-session-jwt-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1266,12 +1234,11 @@ RUN set -e \
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "postgres-exporter", "pgbouncer-exporter", and "sql-exporter"
|
||||
# Layers "postgres-exporter" and "sql-exporter"
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM quay.io/prometheuscommunity/postgres-exporter:v0.16.0 AS postgres-exporter
|
||||
FROM quay.io/prometheuscommunity/pgbouncer-exporter:v0.10.2 AS pgbouncer-exporter
|
||||
|
||||
# Keep the version the same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
@@ -1353,6 +1320,9 @@ COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
|
||||
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
||||
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
||||
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
||||
#pg_anon is not supported yet for pg v17 so, don't fail if nothing found
|
||||
COPY --from=pg-anon-pg-build /pg_anon.tar.g? /ext-src
|
||||
COPY compute/patches/pg_anon.patch /ext-src
|
||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||
RUN cd /ext-src/ && for f in *.tar.gz; \
|
||||
@@ -1363,6 +1333,9 @@ RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
|
||||
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && patch -p1 </ext-src/pg_anon.patch
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
@@ -1403,7 +1376,6 @@ RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
|
||||
|
||||
# Metrics exporter binaries and configuration files
|
||||
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
|
||||
COPY --from=pgbouncer-exporter /bin/pgbouncer_exporter /bin/pgbouncer_exporter
|
||||
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
|
||||
|
||||
COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml
|
||||
|
||||
@@ -19,8 +19,6 @@ max_prepared_statements=0
|
||||
admin_users=postgres
|
||||
unix_socket_dir=/tmp/
|
||||
unix_socket_mode=0777
|
||||
; required for pgbouncer_exporter
|
||||
ignore_startup_parameters=extra_float_digits
|
||||
|
||||
;; Disable connection logging. It produces a lot of logs that no one looks at,
|
||||
;; and we can get similar log entries from the proxy too. We had incidents in
|
||||
|
||||
@@ -1,24 +1,8 @@
|
||||
diff --git a/Makefile b/Makefile
|
||||
index 7a4b88c..56678af 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -3,7 +3,10 @@ EXTVERSION = 0.8.0
|
||||
|
||||
MODULE_big = vector
|
||||
DATA = $(wildcard sql/*--*--*.sql)
|
||||
-DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql
|
||||
+# This change is needed to install different per-version SQL files
|
||||
+# like pgvector--0.8.0.sql and pgvector--0.7.4.sql
|
||||
+# The corresponding file is downloaded during the Docker image build process
|
||||
+DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql sql/vector--0.7.4.sql
|
||||
OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
|
||||
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
||||
|
||||
diff --git a/src/hnswbuild.c b/src/hnswbuild.c
|
||||
index b667478..fc1897c 100644
|
||||
index dcfb2bd..d5189ee 100644
|
||||
--- a/src/hnswbuild.c
|
||||
+++ b/src/hnswbuild.c
|
||||
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||
@@ -860,9 +860,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
|
||||
|
||||
hnswarea = shm_toc_lookup(toc, PARALLEL_KEY_HNSW_AREA, false);
|
||||
|
||||
@@ -36,7 +20,7 @@ index b667478..fc1897c 100644
|
||||
/* Close relations within worker */
|
||||
index_close(indexRel, indexLockmode);
|
||||
table_close(heapRel, heapLockmode);
|
||||
@@ -1100,12 +1108,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
@@ -1117,12 +1125,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
|
||||
SeedRandom(42);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -27,10 +27,6 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: pgbouncer-exporter
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -27,10 +27,6 @@ commands:
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: pgbouncer-exporter
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/bin/pgbouncer_exporter --pgBouncer.connectionString="postgres:///pgbouncer?host=/tmp&port=6432&dbname=pgbouncer&user=pgbouncer"'
|
||||
- name: sql-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -51,7 +51,6 @@ tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
uuid.workspace = true
|
||||
prometheus.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
|
||||
@@ -58,8 +58,6 @@ struct Args {
|
||||
pg_bin_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_lib_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_port: Option<u16>, // port to run postgres on, 5432 is default
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
@@ -76,13 +74,6 @@ enum EncryptionSecret {
|
||||
KMS { key_id: String },
|
||||
}
|
||||
|
||||
// copied from pageserver_api::config::defaults::DEFAULT_LOCALE to avoid dependency just for a constant
|
||||
const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
|
||||
"C"
|
||||
} else {
|
||||
"C.UTF-8"
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
utils::logging::init(
|
||||
@@ -106,10 +97,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
let working_directory = args.working_directory;
|
||||
let pg_bin_dir = args.pg_bin_dir;
|
||||
let pg_lib_dir = args.pg_lib_dir;
|
||||
let pg_port = args.pg_port.unwrap_or_else(|| {
|
||||
info!("pg_port not specified, using default 5432");
|
||||
5432
|
||||
});
|
||||
|
||||
// Initialize AWS clients only if s3_prefix is specified
|
||||
let (aws_config, kms_client) = if args.s3_prefix.is_some() {
|
||||
@@ -193,7 +180,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
|
||||
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
|
||||
superuser,
|
||||
locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
|
||||
locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
|
||||
pg_version,
|
||||
initdb_bin: pg_bin_dir.join("initdb").as_ref(),
|
||||
library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
|
||||
@@ -210,7 +197,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
let mut postgres_proc = tokio::process::Command::new(pgbin)
|
||||
.arg("-D")
|
||||
.arg(&pgdata_dir)
|
||||
.args(["-p", &format!("{pg_port}")])
|
||||
.args(["-c", "wal_level=minimal"])
|
||||
.args(["-c", "shared_buffers=10GB"])
|
||||
.args(["-c", "max_wal_senders=0"])
|
||||
@@ -230,7 +216,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
),
|
||||
])
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
@@ -247,7 +232,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
|
||||
// Create neondb database in the running postgres
|
||||
let restore_pg_connstring =
|
||||
format!("host=localhost port={pg_port} user={superuser} dbname=postgres");
|
||||
format!("host=localhost port=5432 user={superuser} dbname=postgres");
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
@@ -329,7 +314,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.arg(&source_connection_string)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
@@ -363,7 +347,6 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.arg(&dumpdir)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &pg_lib_dir)
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
|
||||
@@ -41,14 +41,14 @@ use crate::local_proxy;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
|
||||
CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||
RunInEachDatabase,
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
|
||||
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
|
||||
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase;
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, DropSubscriptionsForDeletedDatabases,
|
||||
HandleAnonExtension,
|
||||
};
|
||||
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
@@ -340,15 +340,6 @@ impl ComputeNode {
|
||||
self.state.lock().unwrap().status
|
||||
}
|
||||
|
||||
pub fn get_timeline_id(&self) -> Option<TimelineId> {
|
||||
self.state
|
||||
.lock()
|
||||
.unwrap()
|
||||
.pspec
|
||||
.as_ref()
|
||||
.map(|s| s.timeline_id)
|
||||
}
|
||||
|
||||
// Remove `pgdata` directory and create it again with right permissions.
|
||||
fn create_pgdata(&self) -> Result<()> {
|
||||
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
|
||||
@@ -938,48 +929,6 @@ impl ComputeNode {
|
||||
.map(|role| (role.name.clone(), role))
|
||||
.collect::<HashMap<String, Role>>();
|
||||
|
||||
// Check if we need to drop subscriptions before starting the endpoint.
|
||||
//
|
||||
// It is important to do this operation exactly once when endpoint starts on a new branch.
|
||||
// Otherwise, we may drop not inherited, but newly created subscriptions.
|
||||
//
|
||||
// We cannot rely only on spec.drop_subscriptions_before_start flag,
|
||||
// because if for some reason compute restarts inside VM,
|
||||
// it will start again with the same spec and flag value.
|
||||
//
|
||||
// To handle this, we save the fact of the operation in the database
|
||||
// in the neon.drop_subscriptions_done table.
|
||||
// If the table does not exist, we assume that the operation was never performed, so we must do it.
|
||||
// If table exists, we check if the operation was performed on the current timelilne.
|
||||
//
|
||||
let mut drop_subscriptions_done = false;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
|
||||
let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
|
||||
|
||||
info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
|
||||
|
||||
drop_subscriptions_done = match
|
||||
client.simple_query(&query).await {
|
||||
Ok(result) => {
|
||||
matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
|
||||
},
|
||||
Err(e) =>
|
||||
{
|
||||
match e.code() {
|
||||
Some(&SqlState::UNDEFINED_TABLE) => false,
|
||||
_ => {
|
||||
// We don't expect any other error here, except for the schema/table not existing
|
||||
error!("Error checking if drop subscription operation was already performed: {}", e);
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
let jwks_roles = Arc::new(
|
||||
spec.as_ref()
|
||||
.local_proxy_config
|
||||
@@ -1047,7 +996,7 @@ impl ComputeNode {
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
[DropLogicalSubscriptions].to_vec(),
|
||||
[DropSubscriptionsForDeletedDatabases].to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
@@ -1075,7 +1024,6 @@ impl ComputeNode {
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
CreateSchemaNeon,
|
||||
] {
|
||||
info!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
@@ -1116,17 +1064,6 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let mut phases = vec![
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
];
|
||||
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
|
||||
phases.push(DropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
conf,
|
||||
@@ -1134,7 +1071,12 @@ impl ComputeNode {
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
phases,
|
||||
[
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
]
|
||||
.to_vec(),
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
@@ -1146,20 +1088,12 @@ impl ComputeNode {
|
||||
handle.await??;
|
||||
}
|
||||
|
||||
let mut phases = vec![
|
||||
for phase in vec![
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension, // This step depends on CreateSchemaNeon
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
];
|
||||
|
||||
// This step depends on CreateSchemaNeon
|
||||
if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
|
||||
info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
|
||||
phases.push(FinalizeDropLogicalSubscriptions);
|
||||
}
|
||||
|
||||
for phase in phases {
|
||||
] {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
@@ -1529,14 +1463,6 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
if config::line_in_file(
|
||||
&postgresql_conf_path,
|
||||
"neon.disable_logical_replication_subscribers=false",
|
||||
)? {
|
||||
info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false");
|
||||
}
|
||||
self.pg_reload_conf()?;
|
||||
}
|
||||
self.post_apply_config()?;
|
||||
|
||||
@@ -129,13 +129,6 @@ pub fn write_postgres_conf(
|
||||
|
||||
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
|
||||
} else {
|
||||
// be explicit about the default value
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=false")?;
|
||||
}
|
||||
|
||||
// This is essential to keep this line at the end of the file,
|
||||
// because it is intended to override any settings above.
|
||||
writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
use std::{
|
||||
net::{IpAddr, Ipv6Addr, SocketAddr},
|
||||
sync::Arc,
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc,
|
||||
},
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use axum::{
|
||||
extract::Request,
|
||||
middleware::{self, Next},
|
||||
response::{IntoResponse, Response},
|
||||
routing::{get, post},
|
||||
Router,
|
||||
@@ -16,9 +17,11 @@ use axum::{
|
||||
use http::StatusCode;
|
||||
use tokio::net::TcpListener;
|
||||
use tower::ServiceBuilder;
|
||||
use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
|
||||
use tower_http::{
|
||||
request_id::{MakeRequestId, PropagateRequestIdLayer, RequestId, SetRequestIdLayer},
|
||||
trace::TraceLayer,
|
||||
};
|
||||
use tracing::{debug, error, info, Span};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::routes::{
|
||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||
@@ -31,24 +34,30 @@ async fn handle_404() -> Response {
|
||||
StatusCode::NOT_FOUND.into_response()
|
||||
}
|
||||
|
||||
const X_REQUEST_ID: &str = "x-request-id";
|
||||
#[derive(Clone, Default)]
|
||||
struct ComputeMakeRequestId(Arc<AtomicU64>);
|
||||
|
||||
/// This middleware function allows compute_ctl to generate its own request ID
|
||||
/// if one isn't supplied. The control plane will always send one as a UUID. The
|
||||
/// neon Postgres extension on the other hand does not send one.
|
||||
async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
|
||||
let headers = request.headers_mut();
|
||||
impl MakeRequestId for ComputeMakeRequestId {
|
||||
fn make_request_id<B>(
|
||||
&mut self,
|
||||
_request: &http::Request<B>,
|
||||
) -> Option<tower_http::request_id::RequestId> {
|
||||
let request_id = self
|
||||
.0
|
||||
.fetch_add(1, Ordering::SeqCst)
|
||||
.to_string()
|
||||
.parse()
|
||||
.unwrap();
|
||||
|
||||
if headers.get(X_REQUEST_ID).is_none() {
|
||||
headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
|
||||
Some(RequestId::new(request_id))
|
||||
}
|
||||
|
||||
next.run(request).await
|
||||
}
|
||||
|
||||
/// Run the HTTP server and wait on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
const X_REQUEST_ID: &str = "x-request-id";
|
||||
|
||||
let mut app = Router::new()
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
@@ -73,8 +82,9 @@ async fn serve(port: u16, compute: Arc<ComputeNode>) {
|
||||
.fallback(handle_404)
|
||||
.layer(
|
||||
ServiceBuilder::new()
|
||||
// Add this middleware since we assume the request ID exists
|
||||
.layer(middleware::from_fn(maybe_add_request_id_header))
|
||||
.layer(SetRequestIdLayer::x_request_id(
|
||||
ComputeMakeRequestId::default(),
|
||||
))
|
||||
.layer(
|
||||
TraceLayer::new_for_http()
|
||||
.on_request(|request: &http::Request<_>, _span: &Span| {
|
||||
|
||||
@@ -47,7 +47,7 @@ pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
DropLogicalSubscriptions,
|
||||
DropSubscriptionsForDeletedDatabases,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -58,13 +58,11 @@ pub enum ApplySpecPhase {
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
CreateSchemaNeon,
|
||||
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
FinalizeDropLogicalSubscriptions,
|
||||
}
|
||||
|
||||
pub struct Operation {
|
||||
@@ -333,7 +331,7 @@ async fn get_operations<'a>(
|
||||
// NB: there could be other db states, which prevent us from dropping
|
||||
// the database. For example, if db is used by any active subscription
|
||||
// or replication slot.
|
||||
// Such cases are handled in the DropLogicalSubscriptions
|
||||
// Such cases are handled in the DropSubscriptionsForDeletedDatabases
|
||||
// phase. We do all the cleanup before actually dropping the database.
|
||||
let drop_db_query: String = format!(
|
||||
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
|
||||
@@ -444,19 +442,13 @@ async fn get_operations<'a>(
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::CreateSchemaNeon => Ok(Box::new(once(Operation {
|
||||
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
|
||||
comment: Some(String::from(
|
||||
"create schema for neon extension and utils tables",
|
||||
)),
|
||||
}))),
|
||||
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
|
||||
match subphase {
|
||||
PerDatabasePhase::DropLogicalSubscriptions => {
|
||||
PerDatabasePhase::DropSubscriptionsForDeletedDatabases => {
|
||||
match &db {
|
||||
DB::UserDB(db) => {
|
||||
let drop_subscription_query: String = format!(
|
||||
include_str!("sql/drop_subscriptions.sql"),
|
||||
include_str!("sql/drop_subscription_for_drop_dbs.sql"),
|
||||
datname_str = escape_literal(&db.name),
|
||||
);
|
||||
|
||||
@@ -674,6 +666,10 @@ async fn get_operations<'a>(
|
||||
}
|
||||
ApplySpecPhase::HandleNeonExtension => {
|
||||
let operations = vec![
|
||||
Operation {
|
||||
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
|
||||
comment: Some(String::from("init: add schema for extension")),
|
||||
},
|
||||
Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
|
||||
comment: Some(String::from(
|
||||
@@ -716,9 +712,5 @@ async fn get_operations<'a>(
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::FinalizeDropLogicalSubscriptions => Ok(Box::new(once(Operation {
|
||||
query: String::from(include_str!("sql/finalize_drop_subscriptions.sql")),
|
||||
comment: None,
|
||||
}))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_tables
|
||||
WHERE tablename = 'drop_subscriptions_done'
|
||||
AND schemaname = 'neon'
|
||||
)
|
||||
THEN
|
||||
CREATE TABLE neon.drop_subscriptions_done
|
||||
(id serial primary key, timeline_id text);
|
||||
END IF;
|
||||
|
||||
-- preserve the timeline_id of the last drop_subscriptions run
|
||||
-- to ensure that the cleanup of a timeline is executed only once.
|
||||
-- use upsert to avoid the table bloat in case of cascade branching (branch of a branch)
|
||||
INSERT INTO neon.drop_subscriptions_done VALUES (1, current_setting('neon.timeline_id'))
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET timeline_id = current_setting('neon.timeline_id');
|
||||
END
|
||||
$$
|
||||
@@ -1357,7 +1357,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
args.pg_version,
|
||||
mode,
|
||||
!args.update_catalog,
|
||||
false,
|
||||
)?;
|
||||
}
|
||||
EndpointCmd::Start(args) => {
|
||||
|
||||
@@ -76,7 +76,6 @@ pub struct EndpointConf {
|
||||
http_port: u16,
|
||||
pg_version: u32,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
features: Vec<ComputeFeature>,
|
||||
}
|
||||
|
||||
@@ -144,7 +143,6 @@ impl ComputeControlPlane {
|
||||
pg_version: u32,
|
||||
mode: ComputeMode,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||
let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||
@@ -164,7 +162,6 @@ impl ComputeControlPlane {
|
||||
// with this we basically test a case of waking up an idle compute, where
|
||||
// we also skip catalog updates in the cloud.
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
features: vec![],
|
||||
});
|
||||
|
||||
@@ -180,7 +177,6 @@ impl ComputeControlPlane {
|
||||
pg_port,
|
||||
pg_version,
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
features: vec![],
|
||||
})?,
|
||||
)?;
|
||||
@@ -244,7 +240,6 @@ pub struct Endpoint {
|
||||
// Optimizations
|
||||
skip_pg_catalog_updates: bool,
|
||||
|
||||
drop_subscriptions_before_start: bool,
|
||||
// Feature flags
|
||||
features: Vec<ComputeFeature>,
|
||||
}
|
||||
@@ -296,7 +291,6 @@ impl Endpoint {
|
||||
tenant_id: conf.tenant_id,
|
||||
pg_version: conf.pg_version,
|
||||
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
features: conf.features,
|
||||
})
|
||||
}
|
||||
@@ -631,7 +625,6 @@ impl Endpoint {
|
||||
shard_stripe_size: Some(shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
reconfigure_concurrency: 1,
|
||||
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
|
||||
@@ -352,16 +352,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_algorithm' json")?,
|
||||
l0_flush_delay_threshold: settings
|
||||
.remove("l0_flush_delay_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_delay_threshold' as an integer")?,
|
||||
l0_flush_stall_threshold: settings
|
||||
.remove("l0_flush_stall_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'l0_flush_stall_threshold' as an integer")?,
|
||||
gc_horizon: settings
|
||||
.remove("gc_horizon")
|
||||
.map(|x| x.parse::<u64>())
|
||||
@@ -428,26 +418,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `wal_receiver_protocol_override` from json")?,
|
||||
rel_size_v2_enabled: settings
|
||||
.remove("rel_size_v2_enabled")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'rel_size_v2_enabled' as bool")?,
|
||||
gc_compaction_enabled: settings
|
||||
.remove("gc_compaction_enabled")
|
||||
.map(|x| x.parse::<bool>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_enabled' as bool")?,
|
||||
gc_compaction_initial_threshold_kb: settings
|
||||
.remove("gc_compaction_initial_threshold_kb")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_initial_threshold_kb' as integer")?,
|
||||
gc_compaction_ratio_percent: settings
|
||||
.remove("gc_compaction_ratio_percent")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_compaction_ratio_percent' as integer")?,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
|
||||
@@ -298,7 +298,14 @@ impl FromStr for SkSchedulingPolicyArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
SkSchedulingPolicy::from_str(s).map(Self)
|
||||
match s {
|
||||
"active" => Ok(Self(SkSchedulingPolicy::Active)),
|
||||
"disabled" => Ok(Self(SkSchedulingPolicy::Disabled)),
|
||||
"decomissioned" => Ok(Self(SkSchedulingPolicy::Decomissioned)),
|
||||
_ => Err(anyhow::anyhow!(
|
||||
"Unknown scheduling policy '{s}', try active,disabled,decomissioned"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,10 @@ USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
python3-pip \
|
||||
netcat-openbsd
|
||||
#Faker is required for the pg_anon test
|
||||
RUN case $COMPUTE_IMAGE in compute-node-v17) OPT="--break-system-packages";; *) OPT= ;; esac && pip3 install $OPT Faker
|
||||
#This is required for the pg_hintplan test
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
|
||||
|
||||
|
||||
@@ -150,8 +150,8 @@ services:
|
||||
- REPOSITORY=${REPOSITORY:-neondatabase}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
|
||||
- TAG=${TAG:-latest}
|
||||
- http_proxy=${http_proxy:-}
|
||||
- https_proxy=${https_proxy:-}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-16}
|
||||
#- RUST_BACKTRACE=1
|
||||
@@ -185,8 +185,6 @@ services:
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
|
||||
environment:
|
||||
- PGPASSWORD=cloud_admin
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
|
||||
@@ -18,10 +18,14 @@ cd $(dirname $0)
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
: ${http_proxy:=}
|
||||
: ${https_proxy:=}
|
||||
export http_proxy https_proxy
|
||||
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE logs
|
||||
echo "stop containers..."
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
}
|
||||
@@ -31,6 +35,12 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
# The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions
|
||||
if [ "${pg_version}" -ne 17 ]; then
|
||||
SPEC_PATH="compute_wrapper/var/db/postgres/specs"
|
||||
mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
|
||||
jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json"
|
||||
fi
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
@@ -40,6 +50,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
cnt=`expr $cnt + 3`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
@@ -51,19 +62,36 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]; then
|
||||
echo Enabling trust connection
|
||||
docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' "
|
||||
echo Adding postgres role
|
||||
docker exec $COMPUTE_CONTAINER_NAME psql $PSQL_OPTION -c "CREATE ROLE postgres SUPERUSER LOGIN"
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
# This block is required for the pg_anon extension test.
|
||||
# The test assumes that it is running on the same host with the postgres engine.
|
||||
# In our case it's not true, that's why we are copying files to the compute node
|
||||
TMPDIR=$(mktemp -d)
|
||||
# Add support for pg_anon for pg_v16
|
||||
if [ $pg_version -ne 17 ]; then
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
|
||||
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
|
||||
rm -rf $TMPDIR
|
||||
fi
|
||||
TMPDIR=$(mktemp -d)
|
||||
# The following block does the same for the pg_hintplan test
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# We are running tests now
|
||||
if ! docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||
if docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
||||
then
|
||||
cleanup
|
||||
else
|
||||
FAILED=$(tail -1 testout.txt)
|
||||
for d in $FAILED
|
||||
do
|
||||
@@ -73,7 +101,13 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
cat $d/regression.out $d/regression.diffs || true
|
||||
done
|
||||
rm -rf $FAILED
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
cleanup
|
||||
# Restore the original spec.json
|
||||
if [ "$pg_version" -ne 17 ]; then
|
||||
mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -7,10 +7,7 @@ LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
|
||||
for d in ${LIST}
|
||||
do
|
||||
[ -d "${d}" ] || continue
|
||||
if ! psql -w -c "select 1" >/dev/null; then
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
psql -c "select 1" >/dev/null || break
|
||||
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
#
|
||||
Created on Aug 2024
|
||||
Implemented on Jan 2025
|
||||
|
||||
## Summary
|
||||
|
||||
Data in large tenants is split up between multiple pageservers according to key hashes, as
|
||||
introduced in the [sharding RFC](031-sharding-static.md) and [shard splitting RFC](032-shard-splitting.md).
|
||||
|
||||
Whereas currently we send all WAL to all pageserver shards, and each shard filters out the data that it needs,
|
||||
in this RFC we add a mechanism to filter the WAL on the safekeeper, so that each shard receives
|
||||
only the data it needs.
|
||||
|
||||
This will place some extra CPU load on the safekeepers, in exchange for reducing the network bandwidth
|
||||
for ingesting WAL back to scaling as O(1) with shard count, rather than O(N_shards).
|
||||
|
||||
## Motivation
|
||||
|
||||
1. Large databases require higher shard counts. Whereas currently we run with up to 8 shards for tenants
|
||||
with a few TB of storage, the next order of magnitude capacity increase will require tens of shards, such
|
||||
that sending all WAL to all shards is impractical in terms of bandwidth.
|
||||
2. For contemporary database sizes (~2TB), the pageserver is the bottleneck for ingest: since each
|
||||
shard has to decode and process the whole WAL, sharding doesn't fully relieve this bottleneck. To achieve significantly higher ingest speeds, we need to filter the WAL earlier so that each pageserver
|
||||
only has to process relevant parts.
|
||||
|
||||
## Non Goals (if relevant)
|
||||
|
||||
We do not seek to introduce multiple WALs per timeline, or to share the work of handling a timeline's
|
||||
WAL across safekeepers (beyond simple 3x replication). This RFC may be thought of as an incremental
|
||||
move of the ingestion bottleneck up the stack: instead of high write rates bottlenecking on the
|
||||
pageserver, they will bottleneck on the safekeeper.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Safekeeper, pageserver.
|
||||
|
||||
There will be no control plane or storage controller coordination needed, as pageservers will directly
|
||||
indicate their sharding parameters to the safekeeper when subscribing for WAL.
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
Terminology:
|
||||
- "Data pages" refers to postgres relation blocks, and SLRU blocks.
|
||||
- "Metadata pages" refers to everything else the pageserver stores, such as relation sizes and
|
||||
directories of relations.
|
||||
|
||||
### Phase 1: Refactor ingest
|
||||
|
||||
Currently, pageserver ingest code is structured approximately as follows:
|
||||
1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network
|
||||
socket
|
||||
2. `WalIngest::ingest_record` to translate the record into a series of page-level modifications
|
||||
3. `DatadirModification` accumulates page updates from several `ingest_record` calls, and when
|
||||
its `commit()` method is called, flushes these into a Timeline's open `InMemoryLayer`.
|
||||
|
||||
This process currently assumes access to a pageserver `Timeline` throughout `ingest_record` and
|
||||
from `DatadirModification`, which is used to do read-modify-write cycles on metadata pages
|
||||
such as relation sizes and the master DBDIR page. It also assumes that records are ingested
|
||||
strictly one after the other: they cannot be ingested in parallel because each record assumes
|
||||
that earlier records' changes have already been applied to `Timeline`.
|
||||
|
||||
This code will be refactored to disentangle the simple, fast decode of relation page writes
|
||||
from the more complex logic for updating internal metadata. An intermediate representation
|
||||
called `InterpretedWalRecords` will be introduced. This is similar to the internal state of
|
||||
a `DatadirModification`, but does not require access to a Timeline. Instead of storing
|
||||
metadata updates as materialized writes to pages, it will accumulate these as abstract operations,
|
||||
for example rather than including a write to a relation size key, this structure will include
|
||||
an operation that indicates "Update relation _foo_'s size to the max of its current value and
|
||||
_bar_", such that these may be applied later to a real Timeline.
|
||||
|
||||
The `DatadirModification` will be aware of the `EphemeralFile` format, so that as it accumulates
|
||||
simple page writes of relation blocks, it can write them directly into a buffer in the serialized
|
||||
format. This will avoid the need to later deserialize/reserialize this data when passing the
|
||||
structure between safekeeper and pageserver.
|
||||
|
||||
The new pipeline will be:
|
||||
1. `handle_walreceiver_connection` reads a stream of binary WAL records off a network
|
||||
2. A `InterpretedWalRecords` is generated from the incoming WAL records. This does not
|
||||
require a reference to a Timeline.
|
||||
3. The logic that is current spread between `WalIngest` and `DatadirModification` for updating
|
||||
metadata will be refactored to consume the metadata operations from the `InterpretedWalRecords`
|
||||
and turn them into literal writes to metadata pages. This part must be done sequentially.
|
||||
4. The resulting buffer of metadata page writes is combined with the buffer of relation block
|
||||
writes, and written into the `InMemoryLayer`.
|
||||
|
||||
Implemented in:
|
||||
1. https://github.com/neondatabase/neon/pull/9472
|
||||
2. https://github.com/neondatabase/neon/pull/9504
|
||||
3. https://github.com/neondatabase/neon/pull/9524
|
||||
|
||||
### Phase 2: Decode & filter on safekeeper
|
||||
|
||||
In the previous phase, the ingest code was modified to be able to do most of its work without access to
|
||||
a Timeline: this first stage of ingest simply converts a series of binary wal records into
|
||||
a buffer of relation/SLRU page writes, and a buffer of abstract metadata writes.
|
||||
|
||||
The modified ingest code may be transplanted from pageserver to safekeeper (probably via a
|
||||
shared crate). The safekeeper->pageserver network protocol is modified to:
|
||||
- in subscription requests, send the `ShardIdentity` from the pageserver to the safekeeper
|
||||
- in responses, transmit a `InterpretedWalRecords` instead of a raw `WalRecord`.
|
||||
- use the `ShardIdentity` to filter the `ProcessedWalIngest` to relevant content for
|
||||
the subscribing shard before transmitting it.
|
||||
|
||||
The overall behavior of the pageserver->safekeeper interaction remains the same, in terms of
|
||||
consistent LSN feedback, and connection management. Only the payload of the subscriptions
|
||||
changes, to express an LSN range of WAL as a filtered `ProcessedWalIngest` instead of the
|
||||
raw data.
|
||||
|
||||
The ingest code on the pageserver can now skip the part where it does the first phase of
|
||||
processing, as it will receive pre-processed, compressed data off the wire.
|
||||
|
||||
Note that `InterpretedWalRecord` batches multiple `InterpretedWalRecord(s)` in the same network
|
||||
message. Safekeeper reads WAL in chunks of 16 blocks and then decodes as many Postgres WAL records
|
||||
as possible. Each Postgres WAL record maps to one `InterpretedWalRecord` for potentially multiple shards.
|
||||
Hence, the size of the batch is given by the number of Postgres WAL records that fit in 16 blocks.
|
||||
|
||||
The protocol needs to support evolution. Protobuf was chosen here with the view that, in the future,
|
||||
we may migrate it to GRPC altogether
|
||||
|
||||
Implemented in:
|
||||
1. https://github.com/neondatabase/neon/pull/9746
|
||||
2. https://github.com/neondatabase/neon/pull/9821
|
||||
|
||||
### Phase 3: Fan out interpreted WAL
|
||||
|
||||
In the previous phase, the initial processing of WAL was moved to the safekeeper, but it is still
|
||||
done once for each shard: this will generate O(N_shards) CPU work on the safekeeper (especially
|
||||
when considering converting to Protobuf format and compression).
|
||||
|
||||
To avoid this, we fan-out WAL from one (tenant, timeline, shard) to all other shards subscribed on
|
||||
the same safekeeper. Under normal operation, the WAL will be read from disk, decoded and interpreted
|
||||
_only_ once per (safekeeper, timeline).
|
||||
|
||||
When the first shard of a sharded timeline subscribes to a given safekeeper a task is spawned
|
||||
for the WAL reader (`InterpretedWalReader`). This task reads WAL, decodes, interprets it and sends
|
||||
it to the sender (`InterpretedWalSender`). The sender is a future that is polled from the connection
|
||||
task. When further shards subscribe on the safekeeper they will attach themselves to the existing WAL reader.
|
||||
There's two cases to consider:
|
||||
1. The shard's requested `start_lsn` is ahead of the current position of the WAL reader. In this case, the shard
|
||||
will start receiving data when the reader reaches that LSN. The intuition here is that there's little to gain
|
||||
by letting shards "front-run" since compute backpressure is based on the laggard LSN.
|
||||
2. The shard's requested `start_lsn` is below the current position of the WAL reader. In this case, the WAL reader
|
||||
gets reset to this requested position (same intuition). Special care is taken such that advanced shards do not receive
|
||||
interpreted WAL records below their current position.
|
||||
|
||||
The approach above implies that there is at most one WAL reader per (tenant, timeline) on a given safekeeper at any point in time.
|
||||
If this turns out to be operationally problematic, there's a trick we can deploy: `--max-delta-for-fanout` is an optional safekeeper
|
||||
argument that controls the max absolute delta between a new shard and the current WAL position of the WAL reader. If the absolute
|
||||
delta is above that value, a new reader is spawned. Note that there's currently no concurrency control on the number of WAL readers,
|
||||
so it's recommended to use large values to avoid pushing CPU utilisation too high.
|
||||
|
||||
Unsharded tenants do not spawn a separate task for the interpreted WAL reader since there's no benefit to it. Instead they poll
|
||||
the reader and sender concurrently from the connection task.
|
||||
|
||||
Shard splits are interesting here because it is the only case when the same shard might have two subscriptions at the same time.
|
||||
This is handled by giving readers a unique identifier. Both shards will receive the same data while respecting their requested start
|
||||
position.
|
||||
|
||||
Implemented in:
|
||||
1. https://github.com/neondatabase/neon/pull/10190
|
||||
|
||||
## Deployment
|
||||
|
||||
Each phase shall be deployed independently. Special care should be taken around protocol changes.
|
||||
|
||||
## Observability Tips
|
||||
|
||||
* The safekeeper logs the protocol requested by the pageserver
|
||||
along with the pageserver ID, tenant, timeline and shard: `starting streaming from`.
|
||||
* There's metrics for the number of wal readers:
|
||||
* `safekeeper_wal_readers{kind="task", target=~"pageserver.*"}` gives the number of wal reader tasks for each SK
|
||||
* `safekeeper_wal_readers{kind="future", target=~"pageserver.*"}` gives the numer of wal readers polled inline by each SK
|
||||
* `safekeeper_interpreted_wal_reader_tasks` gives the number of wal reader tasks per tenant, timeline
|
||||
* Interesting log lines for the fan-out reader:
|
||||
* `Spawning interpreted`: first shard creates the interpreted wal reader
|
||||
* `Fanning out`: a subsequent shard attaches itself to an interpreted wal reader
|
||||
* `Aborting interpreted`: all senders have finished and the reader task is being aborted
|
||||
|
||||
## Future Optimizations
|
||||
|
||||
This sections describes some improvement areas which may be revisited in the future.
|
||||
|
||||
### Buffering of Interpreted WAL
|
||||
|
||||
The interpreted WAL reader may buffer interpreted WAL records in user space to help with serving
|
||||
subscriptions that are lagging behind the current position of the reader.
|
||||
|
||||
Counterpoints:
|
||||
* Safekeepers serve many thousands of timelines and allocating a buffer for each might be wasteful,
|
||||
especially given that it would go unused on the happy path.
|
||||
* WAL is buffered in the kernel page cache. Usually we'd only pay the CPU cost of decoding and interpreting.
|
||||
|
||||
### Tweaking the Pagserver Safekeeper Selection Algorithm
|
||||
|
||||
We could make the pageserver aware of which safekeeper's already host shards for the timeline along
|
||||
with their current WAL positions. The pageserver should then prefer safkeepers that are in the same
|
||||
AZ _and_ already have a shard with a position close to the desired start position.
|
||||
|
||||
We currently run one safekeeper per AZ, so the point is mute until that changes.
|
||||
|
||||
### Pipelining first ingest phase
|
||||
|
||||
The first ingest phase is a stateless transformation of a binary WAL record into a pre-processed
|
||||
output per shard. To put multiple CPUs to work, we may pipeline this processing up to some defined buffer
|
||||
depth.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
### Give safekeepers enough state to fully decode WAL
|
||||
|
||||
In this RFC, we only do the first phase of ingest on the safekeeper, because this is
|
||||
the phase that is stateless. Subsequent changes then happen on the pageserver, with
|
||||
access to the `Timeline` state.
|
||||
|
||||
We could do more work on the safekeeper if we transmitted metadata state to the safekeeper
|
||||
when subscribing to the WAL: for example, by telling the safekeeper all the relation sizes,
|
||||
so that it could then generate all the metadata writes for relation sizes.
|
||||
|
||||
We avoid doing this for several reasons:
|
||||
1. Complexity: it's a more invasive protocol change
|
||||
2. Decoupling: having the safekeeper understand the `ProcessedWalIngest` already somewhat
|
||||
infects it with knowledge of the pageserver, but this is mainly an abstract structure
|
||||
that describes postgres writes. However, if we taught the safekeeper about the exact
|
||||
way that pageserver deals with metadata keys, this would be a much tighter coupling.
|
||||
3. Load: once the WAL has been processed to the point that it can be split between shards,
|
||||
it is preferable to share out work on the remaining shards rather than adding extra CPU
|
||||
load to the safekeeper.
|
||||
|
||||
### Do pre-processing on the compute instead of the safekeeper
|
||||
|
||||
Since our first stage of ingest is stateless, it could be done at any stage in the pipeline,
|
||||
all the way up to the compute.
|
||||
|
||||
We choose not to do this, because it is useful for the safekeeper to store the raw WAL rather
|
||||
than just the preprocessed WAL:
|
||||
- The safekeeper still needs to be able to serve raw WAL back to postgres for e.g. physical replication
|
||||
- It simplifies our paxos implementation to have the offset in the write log be literally
|
||||
the same as the LSN
|
||||
- Raw WAL must have a stable protocol since we might have to re-ingest it at arbitrary points in the future.
|
||||
Storing raw WAL give us more flexibility to evolve the pageserver, safekeeper protocol.
|
||||
|
||||
### Do wal pre-processing on shard 0 or a separate service, send it to other shards from there
|
||||
|
||||
If we wanted to keep the safekeepers as entirely pure stores of raw WAL bytes, then
|
||||
we could do the initial decode and shard-splitting in some other location:
|
||||
- Shard 0 could subscribe to the full WAL and then send writes to other shards
|
||||
- A new intermediate service between the safekeeper and pageserver could do the splitting.
|
||||
|
||||
So why not?
|
||||
- Extra network hop from shard 0 to the final destination shard
|
||||
- Clearly there is more infrastructure involved here compared with doing it inline on the safekeeper.
|
||||
- Safekeepers already have very light CPU load: typical cloud instances shapes with appropriate
|
||||
disks for the safekeepers effectively have "free" CPU resources.
|
||||
- Doing extra work on shard 0 would complicate scheduling of shards on pageservers, because
|
||||
shard 0 would have significantly higher CPU load under write workloads than other shards.
|
||||
@@ -138,13 +138,6 @@ pub struct ComputeSpec {
|
||||
/// enough spare connections for reconfiguration process to succeed.
|
||||
#[serde(default = "default_reconfigure_concurrency")]
|
||||
pub reconfigure_concurrency: usize,
|
||||
|
||||
/// If set to true, the compute_ctl will drop all subscriptions before starting the
|
||||
/// compute. This is needed when we start an endpoint on a branch, so that child
|
||||
/// would not compete with parent branch subscriptions
|
||||
/// over the same replication content from publisher.
|
||||
#[serde(default)] // Default false
|
||||
pub drop_subscriptions_before_start: bool,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
|
||||
@@ -120,7 +120,6 @@ pub struct ConfigToml {
|
||||
pub no_sync: Option<bool>,
|
||||
pub wal_receiver_protocol: PostgresClientProtocol,
|
||||
pub page_service_pipelining: PageServicePipeliningConfig,
|
||||
pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -159,25 +158,6 @@ pub enum PageServiceProtocolPipelinedExecutionStrategy {
|
||||
Tasks,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case")]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub enum GetVectoredConcurrentIo {
|
||||
/// The read path is fully sequential: layers are visited
|
||||
/// one after the other and IOs are issued and waited upon
|
||||
/// from the same task that traverses the layers.
|
||||
Sequential,
|
||||
/// The read path still traverses layers sequentially, and
|
||||
/// index blocks will be read into the PS PageCache from
|
||||
/// that task, with waiting.
|
||||
/// But data IOs are dispatched and waited upon from a sidecar
|
||||
/// task so that the traversing task can continue to traverse
|
||||
/// layers while the IOs are in flight.
|
||||
/// If the PS PageCache miss rate is low, this improves
|
||||
/// throughput dramatically.
|
||||
SidecarTask,
|
||||
}
|
||||
|
||||
pub mod statvfs {
|
||||
pub mod mock {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -254,18 +234,9 @@ pub struct TenantConfigToml {
|
||||
// Duration::ZERO means automatic compaction is disabled.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub compaction_period: Duration,
|
||||
/// Level0 delta layer threshold for compaction.
|
||||
// Level0 delta layer threshold for compaction.
|
||||
pub compaction_threshold: usize,
|
||||
pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
|
||||
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
|
||||
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
|
||||
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
|
||||
/// blowing up. Should be >compaction_threshold. If None, defaults to 2 * compaction_threshold.
|
||||
/// 0 to disable.
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
/// Level0 delta layer threshold at which to stall layer flushes. 0 to disable. If None,
|
||||
/// defaults to 4 * compaction_threshold. Must be >compaction_threshold to avoid deadlock.
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
// Determines how much history is retained, to allow
|
||||
// branching and read replicas at an older point in time.
|
||||
// The unit is #of bytes of WAL.
|
||||
@@ -330,20 +301,6 @@ pub struct TenantConfigToml {
|
||||
pub timeline_offloading: bool,
|
||||
|
||||
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
|
||||
|
||||
/// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
|
||||
/// `index_part.json`, and it cannot be reversed.
|
||||
pub rel_size_v2_enabled: Option<bool>,
|
||||
|
||||
// gc-compaction related configs
|
||||
/// Enable automatic gc-compaction trigger on this tenant.
|
||||
pub gc_compaction_enabled: bool,
|
||||
/// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
|
||||
/// gc-compaction will be triggered.
|
||||
pub gc_compaction_initial_threshold_kb: u64,
|
||||
/// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
|
||||
/// is above this ratio, gc-compaction will be triggered.
|
||||
pub gc_compaction_ratio_percent: u64,
|
||||
}
|
||||
|
||||
pub mod defaults {
|
||||
@@ -493,11 +450,6 @@ impl Default for ConfigToml {
|
||||
execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
|
||||
})
|
||||
},
|
||||
get_vectored_concurrent_io: if !cfg!(test) {
|
||||
GetVectoredConcurrentIo::Sequential
|
||||
} else {
|
||||
GetVectoredConcurrentIo::SidecarTask
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -542,9 +494,6 @@ pub mod tenant_conf_defaults {
|
||||
// By default ingest enough WAL for two new L0 layers before checking if new image
|
||||
// image layers should be created.
|
||||
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
|
||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
|
||||
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000;
|
||||
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
|
||||
}
|
||||
|
||||
impl Default for TenantConfigToml {
|
||||
@@ -561,8 +510,6 @@ impl Default for TenantConfigToml {
|
||||
compaction_algorithm: crate::models::CompactionAlgorithmSettings {
|
||||
kind: DEFAULT_COMPACTION_ALGORITHM,
|
||||
},
|
||||
l0_flush_delay_threshold: None,
|
||||
l0_flush_stall_threshold: None,
|
||||
gc_horizon: DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
.expect("cannot parse default gc period"),
|
||||
@@ -591,10 +538,6 @@ impl Default for TenantConfigToml {
|
||||
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
|
||||
timeline_offloading: false,
|
||||
wal_receiver_protocol_override: None,
|
||||
rel_size_v2_enabled: None,
|
||||
gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
|
||||
gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
|
||||
gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -324,7 +324,7 @@ impl From<NodeSchedulingPolicy> for String {
|
||||
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
||||
pub enum SkSchedulingPolicy {
|
||||
Active,
|
||||
Pause,
|
||||
Disabled,
|
||||
Decomissioned,
|
||||
}
|
||||
|
||||
@@ -334,13 +334,9 @@ impl FromStr for SkSchedulingPolicy {
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"active" => Self::Active,
|
||||
"pause" => Self::Pause,
|
||||
"disabled" => Self::Disabled,
|
||||
"decomissioned" => Self::Decomissioned,
|
||||
_ => {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Unknown scheduling policy '{s}', try active,pause,decomissioned"
|
||||
))
|
||||
}
|
||||
_ => return Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -350,7 +346,7 @@ impl From<SkSchedulingPolicy> for String {
|
||||
use SkSchedulingPolicy::*;
|
||||
match value {
|
||||
Active => "active",
|
||||
Pause => "pause",
|
||||
Disabled => "disabled",
|
||||
Decomissioned => "decomissioned",
|
||||
}
|
||||
.to_string()
|
||||
|
||||
@@ -33,6 +33,7 @@ use crate::{
|
||||
reltag::RelTag,
|
||||
shard::{ShardCount, ShardStripeSize, TenantShardId},
|
||||
};
|
||||
use anyhow::bail;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
@@ -462,10 +463,6 @@ pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub l0_flush_delay_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub l0_flush_stall_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_horizon: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_period: FieldPatch<String>,
|
||||
@@ -501,14 +498,6 @@ pub struct TenantConfigPatch {
|
||||
pub timeline_offloading: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub wal_receiver_protocol_override: FieldPatch<PostgresClientProtocol>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub rel_size_v2_enabled: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_compaction_enabled: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_compaction_initial_threshold_kb: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_compaction_ratio_percent: FieldPatch<u64>,
|
||||
}
|
||||
|
||||
/// An alternative representation of `pageserver::tenant::TenantConf` with
|
||||
@@ -522,8 +511,6 @@ pub struct TenantConfig {
|
||||
pub compaction_threshold: Option<usize>,
|
||||
// defer parsing compaction_algorithm, like eviction_policy
|
||||
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
@@ -542,10 +529,6 @@ pub struct TenantConfig {
|
||||
pub lsn_lease_length_for_ts: Option<String>,
|
||||
pub timeline_offloading: Option<bool>,
|
||||
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
|
||||
pub rel_size_v2_enabled: Option<bool>,
|
||||
pub gc_compaction_enabled: Option<bool>,
|
||||
pub gc_compaction_initial_threshold_kb: Option<u64>,
|
||||
pub gc_compaction_ratio_percent: Option<u64>,
|
||||
}
|
||||
|
||||
impl TenantConfig {
|
||||
@@ -557,8 +540,6 @@ impl TenantConfig {
|
||||
mut compaction_period,
|
||||
mut compaction_threshold,
|
||||
mut compaction_algorithm,
|
||||
mut l0_flush_delay_threshold,
|
||||
mut l0_flush_stall_threshold,
|
||||
mut gc_horizon,
|
||||
mut gc_period,
|
||||
mut image_creation_threshold,
|
||||
@@ -577,10 +558,6 @@ impl TenantConfig {
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
mut wal_receiver_protocol_override,
|
||||
mut rel_size_v2_enabled,
|
||||
mut gc_compaction_enabled,
|
||||
mut gc_compaction_initial_threshold_kb,
|
||||
mut gc_compaction_ratio_percent,
|
||||
} = self;
|
||||
|
||||
patch.checkpoint_distance.apply(&mut checkpoint_distance);
|
||||
@@ -591,12 +568,6 @@ impl TenantConfig {
|
||||
patch.compaction_period.apply(&mut compaction_period);
|
||||
patch.compaction_threshold.apply(&mut compaction_threshold);
|
||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||
patch
|
||||
.l0_flush_delay_threshold
|
||||
.apply(&mut l0_flush_delay_threshold);
|
||||
patch
|
||||
.l0_flush_stall_threshold
|
||||
.apply(&mut l0_flush_stall_threshold);
|
||||
patch.gc_horizon.apply(&mut gc_horizon);
|
||||
patch.gc_period.apply(&mut gc_period);
|
||||
patch
|
||||
@@ -631,16 +602,6 @@ impl TenantConfig {
|
||||
patch
|
||||
.wal_receiver_protocol_override
|
||||
.apply(&mut wal_receiver_protocol_override);
|
||||
patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled);
|
||||
patch
|
||||
.gc_compaction_enabled
|
||||
.apply(&mut gc_compaction_enabled);
|
||||
patch
|
||||
.gc_compaction_initial_threshold_kb
|
||||
.apply(&mut gc_compaction_initial_threshold_kb);
|
||||
patch
|
||||
.gc_compaction_ratio_percent
|
||||
.apply(&mut gc_compaction_ratio_percent);
|
||||
|
||||
Self {
|
||||
checkpoint_distance,
|
||||
@@ -649,8 +610,6 @@ impl TenantConfig {
|
||||
compaction_period,
|
||||
compaction_threshold,
|
||||
compaction_algorithm,
|
||||
l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold,
|
||||
gc_horizon,
|
||||
gc_period,
|
||||
image_creation_threshold,
|
||||
@@ -669,10 +628,6 @@ impl TenantConfig {
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
wal_receiver_protocol_override,
|
||||
rel_size_v2_enabled,
|
||||
gc_compaction_enabled,
|
||||
gc_compaction_initial_threshold_kb,
|
||||
gc_compaction_ratio_percent,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1445,8 +1400,6 @@ pub enum PagestreamFeMessage {
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentRequest),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
@@ -1458,22 +1411,6 @@ pub enum PagestreamBeMessage {
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentResponse),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestResponse),
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamFeMessageTag {
|
||||
Exists = 0,
|
||||
Nblocks = 1,
|
||||
GetPage = 2,
|
||||
DbSize = 3,
|
||||
GetSlruSegment = 4,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 99,
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
@@ -1485,28 +1422,7 @@ enum PagestreamBeMessageTag {
|
||||
Error = 103,
|
||||
DbSize = 104,
|
||||
GetSlruSegment = 105,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 199,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamFeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
0 => Ok(PagestreamFeMessageTag::Exists),
|
||||
1 => Ok(PagestreamFeMessageTag::Nblocks),
|
||||
2 => Ok(PagestreamFeMessageTag::GetPage),
|
||||
3 => Ok(PagestreamFeMessageTag::DbSize),
|
||||
4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
99 => Ok(PagestreamFeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
@@ -1517,8 +1433,6 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
103 => Ok(PagestreamBeMessageTag::Error),
|
||||
104 => Ok(PagestreamBeMessageTag::DbSize),
|
||||
105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
199 => Ok(PagestreamBeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
@@ -1636,20 +1550,6 @@ pub struct PagestreamDbSizeResponse {
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PagestreamTestRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub batch_key: u64,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamTestResponse {
|
||||
pub req: PagestreamTestRequest,
|
||||
}
|
||||
|
||||
// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
|
||||
// that require pageserver-internal types. It is sufficient to get the total size.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -1669,7 +1569,7 @@ impl PagestreamFeMessage {
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
|
||||
bytes.put_u8(0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
@@ -1680,7 +1580,7 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
|
||||
bytes.put_u8(1);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
@@ -1691,7 +1591,7 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
|
||||
bytes.put_u8(2);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
@@ -1703,7 +1603,7 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
|
||||
bytes.put_u8(3);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
@@ -1711,24 +1611,13 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
|
||||
bytes.put_u8(4);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Test as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(req.batch_key);
|
||||
let message = req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
@@ -1756,66 +1645,56 @@ impl PagestreamFeMessage {
|
||||
),
|
||||
};
|
||||
|
||||
match PagestreamFeMessageTag::try_from(msg_tag)
|
||||
.map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
|
||||
{
|
||||
PagestreamFeMessageTag::Exists => {
|
||||
Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::Nblocks => {
|
||||
Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetPage => {
|
||||
Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::DbSize => {
|
||||
Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
@@ -1826,21 +1705,7 @@ impl PagestreamFeMessage {
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
)),
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key: body.read_u64::<BigEndian>()?,
|
||||
message: {
|
||||
let len = body.read_u64::<BigEndian>()?;
|
||||
let mut buf = vec![0; len as usize];
|
||||
body.read_exact(&mut buf)?;
|
||||
String::from_utf8(buf)?
|
||||
},
|
||||
})),
|
||||
_ => bail!("unknown smgr message tag: {:?}", msg_tag),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1883,15 +1748,6 @@ impl PagestreamBeMessage {
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
@@ -1960,18 +1816,6 @@ impl PagestreamBeMessage {
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2114,28 +1958,6 @@ impl PagestreamBeMessage {
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Tag::Test => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let batch_key = buf.read_u64::<BigEndian>()?;
|
||||
let len = buf.read_u64::<BigEndian>()?;
|
||||
let mut msg = vec![0; len as usize];
|
||||
buf.read_exact(&mut msg)?;
|
||||
let message = String::from_utf8(msg)?;
|
||||
Self::Test(PagestreamTestResponse {
|
||||
req: PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key,
|
||||
message,
|
||||
},
|
||||
})
|
||||
}
|
||||
};
|
||||
let remaining = buf.into_inner();
|
||||
if !remaining.is_empty() {
|
||||
@@ -2155,8 +1977,6 @@ impl PagestreamBeMessage {
|
||||
Self::Error(_) => "Error",
|
||||
Self::DbSize(_) => "DbSize",
|
||||
Self::GetSlruSegment(_) => "GetSlruSegment",
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(_) => "Test",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -277,8 +277,3 @@ pub struct TimelineTermBumpResponse {
|
||||
pub previous_term: u64,
|
||||
pub current_term: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct SafekeeperUtilization {
|
||||
pub timeline_count: u64,
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
use std::{fmt::Display, str::FromStr};
|
||||
|
||||
/// For types `V` that implement [`FromStr`].
|
||||
pub fn var<V, E>(varname: &str) -> Option<V>
|
||||
where
|
||||
V: FromStr<Err = E>,
|
||||
@@ -11,9 +10,7 @@ where
|
||||
match std::env::var(varname) {
|
||||
Ok(s) => Some(
|
||||
s.parse()
|
||||
.map_err(|e| {
|
||||
format!("failed to parse env var {varname} using FromStr::parse: {e:#}")
|
||||
})
|
||||
.map_err(|e| format!("failed to parse env var {varname}: {e:#}"))
|
||||
.unwrap(),
|
||||
),
|
||||
Err(std::env::VarError::NotPresent) => None,
|
||||
@@ -22,24 +19,3 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For types `V` that implement [`serde::de::DeserializeOwned`].
|
||||
pub fn var_serde_json_string<V>(varname: &str) -> Option<V>
|
||||
where
|
||||
V: serde::de::DeserializeOwned,
|
||||
{
|
||||
match std::env::var(varname) {
|
||||
Ok(s) => Some({
|
||||
let value = serde_json::Value::String(s);
|
||||
serde_json::from_value(value)
|
||||
.map_err(|e| {
|
||||
format!("failed to parse env var {varname} as a serde_json json string: {e:#}")
|
||||
})
|
||||
.unwrap()
|
||||
}),
|
||||
Err(std::env::VarError::NotPresent) => None,
|
||||
Err(std::env::VarError::NotUnicode(_)) => {
|
||||
panic!("env var {varname} is not unicode")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,55 +11,31 @@ use tracing::*;
|
||||
|
||||
/// Declare a failpoint that can use to `pause` failpoint action.
|
||||
/// We don't want to block the executor thread, hence, spawn_blocking + await.
|
||||
///
|
||||
/// Optionally pass a cancellation token, and this failpoint will drop out of
|
||||
/// its pause when the cancellation token fires. This is useful for testing
|
||||
/// cases where we would like to block something, but test its clean shutdown behavior.
|
||||
/// The macro evaluates to a Result in that case, where Ok(()) is the case
|
||||
/// where the failpoint was not paused, and Err() is the case where cancellation
|
||||
/// token fired while evaluating the failpoint.
|
||||
///
|
||||
/// Remember to unpause the failpoint in the test; until that happens, one of the
|
||||
/// limited number of spawn_blocking thread pool threads is leaked.
|
||||
#[macro_export]
|
||||
macro_rules! pausable_failpoint {
|
||||
($name:literal) => {{
|
||||
($name:literal) => {
|
||||
if cfg!(feature = "testing") {
|
||||
let cancel = ::tokio_util::sync::CancellationToken::new();
|
||||
let _ = $crate::pausable_failpoint!($name, &cancel);
|
||||
}
|
||||
}};
|
||||
($name:literal, $cancel:expr) => {{
|
||||
if cfg!(feature = "testing") {
|
||||
let failpoint_fut = ::tokio::task::spawn_blocking({
|
||||
let current = ::tracing::Span::current();
|
||||
tokio::task::spawn_blocking({
|
||||
let current = tracing::Span::current();
|
||||
move || {
|
||||
let _entered = current.entered();
|
||||
::tracing::info!("at failpoint {}", $name);
|
||||
::fail::fail_point!($name);
|
||||
tracing::info!("at failpoint {}", $name);
|
||||
fail::fail_point!($name);
|
||||
}
|
||||
});
|
||||
let cancel_fut = async move {
|
||||
$cancel.cancelled().await;
|
||||
};
|
||||
::tokio::select! {
|
||||
res = failpoint_fut => {
|
||||
res.expect("spawn_blocking");
|
||||
// continue with execution
|
||||
Ok(())
|
||||
},
|
||||
_ = cancel_fut => {
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.expect("spawn_blocking");
|
||||
}
|
||||
}};
|
||||
};
|
||||
($name:literal, $cond:expr) => {
|
||||
if cfg!(feature = "testing") {
|
||||
if $cond {
|
||||
pausable_failpoint!($name)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub use pausable_failpoint;
|
||||
|
||||
/// use with fail::cfg("$name", "return(2000)")
|
||||
///
|
||||
/// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
//! A wrapper around `ArcSwap` that ensures there is only one writer at a time and writes
|
||||
//! don't block reads.
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::TryLockError;
|
||||
|
||||
pub struct GuardArcSwap<T> {
|
||||
inner: ArcSwap<T>,
|
||||
guard: tokio::sync::Mutex<()>,
|
||||
}
|
||||
|
||||
pub struct Guard<'a, T> {
|
||||
_guard: tokio::sync::MutexGuard<'a, ()>,
|
||||
inner: &'a ArcSwap<T>,
|
||||
}
|
||||
|
||||
impl<T> GuardArcSwap<T> {
|
||||
pub fn new(inner: T) -> Self {
|
||||
Self {
|
||||
inner: ArcSwap::new(Arc::new(inner)),
|
||||
guard: tokio::sync::Mutex::new(()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read(&self) -> Arc<T> {
|
||||
self.inner.load_full()
|
||||
}
|
||||
|
||||
pub async fn write_guard(&self) -> Guard<'_, T> {
|
||||
Guard {
|
||||
_guard: self.guard.lock().await,
|
||||
inner: &self.inner,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_write_guard(&self) -> Result<Guard<'_, T>, TryLockError> {
|
||||
let guard = self.guard.try_lock()?;
|
||||
Ok(Guard {
|
||||
_guard: guard,
|
||||
inner: &self.inner,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Guard<'_, T> {
|
||||
pub fn read(&self) -> Arc<T> {
|
||||
self.inner.load_full()
|
||||
}
|
||||
|
||||
pub fn write(&mut self, value: T) {
|
||||
self.inner.store(Arc::new(value));
|
||||
}
|
||||
}
|
||||
@@ -98,8 +98,6 @@ pub mod try_rcu;
|
||||
|
||||
pub mod pprof;
|
||||
|
||||
pub mod guard_arc_swap;
|
||||
|
||||
// Re-export used in macro. Avoids adding git-version as dep in target crates.
|
||||
#[doc(hidden)]
|
||||
pub use git_version;
|
||||
|
||||
@@ -64,12 +64,6 @@ pub struct GateGuard {
|
||||
gate: Arc<GateInner>,
|
||||
}
|
||||
|
||||
impl GateGuard {
|
||||
pub fn try_clone(&self) -> Result<Self, GateError> {
|
||||
Gate::enter_impl(self.gate.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GateGuard {
|
||||
fn drop(&mut self) {
|
||||
if self.gate.closing.load(Ordering::Relaxed) {
|
||||
@@ -113,11 +107,11 @@ impl Gate {
|
||||
/// to avoid blocking close() indefinitely: typically types that contain a Gate will
|
||||
/// also contain a CancellationToken.
|
||||
pub fn enter(&self) -> Result<GateGuard, GateError> {
|
||||
Self::enter_impl(self.inner.clone())
|
||||
}
|
||||
|
||||
fn enter_impl(gate: Arc<GateInner>) -> Result<GateGuard, GateError> {
|
||||
let permit = gate.sem.try_acquire().map_err(|_| GateError::GateClosed)?;
|
||||
let permit = self
|
||||
.inner
|
||||
.sem
|
||||
.try_acquire()
|
||||
.map_err(|_| GateError::GateClosed)?;
|
||||
|
||||
// we now have the permit, let's disable the normal raii functionality and leave
|
||||
// "returning" the permit to our GateGuard::drop.
|
||||
@@ -128,7 +122,7 @@ impl Gate {
|
||||
|
||||
Ok(GateGuard {
|
||||
span_at_enter: tracing::Span::current(),
|
||||
gate,
|
||||
gate: self.inner.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -258,39 +252,4 @@ mod tests {
|
||||
// Attempting to enter() is still forbidden
|
||||
gate.enter().expect_err("enter should fail finishing close");
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn clone_gate_guard() {
|
||||
let gate = Gate::default();
|
||||
let forever = Duration::from_secs(24 * 7 * 365);
|
||||
|
||||
let guard1 = gate.enter().expect("gate isn't closed");
|
||||
|
||||
let guard2 = guard1.try_clone().expect("gate isn't clsoed");
|
||||
|
||||
let mut close_fut = std::pin::pin!(gate.close());
|
||||
|
||||
tokio::time::timeout(forever, &mut close_fut)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
// we polled close_fut once, that should prevent all later enters and clones
|
||||
gate.enter().unwrap_err();
|
||||
guard1.try_clone().unwrap_err();
|
||||
guard2.try_clone().unwrap_err();
|
||||
|
||||
// guard2 keeps gate open even if guard1 is closed
|
||||
drop(guard1);
|
||||
tokio::time::timeout(forever, &mut close_fut)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
drop(guard2);
|
||||
|
||||
// now that the last guard is dropped, closing should complete
|
||||
close_fut.await;
|
||||
|
||||
// entering is still forbidden
|
||||
gate.enter().expect_err("enter should stilll fail");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ postgres_ffi.workspace = true
|
||||
serde.workspace = true
|
||||
thiserror.workspace = true
|
||||
tokio = { workspace = true, features = ["io-util"] }
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -45,7 +45,7 @@ pub mod proto {
|
||||
#![allow(clippy::derive_partial_eq_without_eq)]
|
||||
// The generated ValueMeta has a `len` method generate for its `len` field.
|
||||
#![allow(clippy::len_without_is_empty)]
|
||||
include!(concat!(env!("OUT_DIR"), concat!("/interpreted_wal.rs")));
|
||||
tonic::include_proto!("interpreted_wal");
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Serialize, Deserialize)]
|
||||
|
||||
@@ -8,7 +8,7 @@ license.workspace = true
|
||||
default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "pageserver_client/testing"]
|
||||
testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing"]
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
@@ -114,7 +114,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "upload_queue"
|
||||
harness = false
|
||||
|
||||
[[bin]]
|
||||
name = "test_helper_slow_client_reads"
|
||||
required-features = [ "testing" ]
|
||||
|
||||
@@ -4,9 +4,6 @@ version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = [ "pageserver_api/testing" ]
|
||||
|
||||
[dependencies]
|
||||
pageserver_api.workspace = true
|
||||
thiserror.workspace = true
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::pin::Pin;
|
||||
|
||||
use futures::{
|
||||
stream::{SplitSink, SplitStream},
|
||||
SinkExt, StreamExt,
|
||||
};
|
||||
use futures::SinkExt;
|
||||
use pageserver_api::{
|
||||
models::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
|
||||
@@ -13,6 +10,7 @@ use pageserver_api::{
|
||||
};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_postgres::CopyOutStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
@@ -64,28 +62,15 @@ impl Client {
|
||||
.client
|
||||
.copy_both_simple(&format!("pagestream_v3 {tenant_id} {timeline_id}"))
|
||||
.await?;
|
||||
let (sink, stream) = copy_both.split(); // TODO: actually support splitting of the CopyBothDuplex so the lock inside this split adaptor goes away.
|
||||
let Client {
|
||||
cancel_on_client_drop,
|
||||
conn_task,
|
||||
client: _,
|
||||
} = self;
|
||||
let shared = Arc::new(Mutex::new(PagestreamShared::ConnTaskRunning(
|
||||
ConnTaskRunning {
|
||||
cancel_on_client_drop,
|
||||
conn_task,
|
||||
},
|
||||
)));
|
||||
Ok(PagestreamClient {
|
||||
sink: PagestreamSender {
|
||||
shared: shared.clone(),
|
||||
sink,
|
||||
},
|
||||
stream: PagestreamReceiver {
|
||||
shared: shared.clone(),
|
||||
stream,
|
||||
},
|
||||
shared,
|
||||
copy_both: Box::pin(copy_both),
|
||||
conn_task,
|
||||
cancel_on_client_drop,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -112,28 +97,7 @@ impl Client {
|
||||
|
||||
/// Create using [`Client::pagestream`].
|
||||
pub struct PagestreamClient {
|
||||
shared: Arc<Mutex<PagestreamShared>>,
|
||||
sink: PagestreamSender,
|
||||
stream: PagestreamReceiver,
|
||||
}
|
||||
|
||||
pub struct PagestreamSender {
|
||||
#[allow(dead_code)]
|
||||
shared: Arc<Mutex<PagestreamShared>>,
|
||||
sink: SplitSink<tokio_postgres::CopyBothDuplex<bytes::Bytes>, bytes::Bytes>,
|
||||
}
|
||||
|
||||
pub struct PagestreamReceiver {
|
||||
#[allow(dead_code)]
|
||||
shared: Arc<Mutex<PagestreamShared>>,
|
||||
stream: SplitStream<tokio_postgres::CopyBothDuplex<bytes::Bytes>>,
|
||||
}
|
||||
|
||||
enum PagestreamShared {
|
||||
ConnTaskRunning(ConnTaskRunning),
|
||||
ConnTaskCancelledJoinHandleReturnedOrDropped,
|
||||
}
|
||||
struct ConnTaskRunning {
|
||||
copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
|
||||
cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
|
||||
conn_task: JoinHandle<()>,
|
||||
}
|
||||
@@ -146,11 +110,11 @@ pub struct RelTagBlockNo {
|
||||
impl PagestreamClient {
|
||||
pub async fn shutdown(self) {
|
||||
let Self {
|
||||
shared,
|
||||
sink,
|
||||
stream,
|
||||
} = { self };
|
||||
// The `copy_both` split into `sink` and `stream` contains internal channel sender, the receiver of which is polled by `conn_task`.
|
||||
copy_both,
|
||||
cancel_on_client_drop: cancel_conn_task,
|
||||
conn_task,
|
||||
} = self;
|
||||
// The `copy_both` contains internal channel sender, the receiver of which is polled by `conn_task`.
|
||||
// When `conn_task` observes the sender has been dropped, it sends a `FeMessage::CopyFail` into the connection.
|
||||
// (see https://github.com/neondatabase/rust-postgres/blob/2005bf79573b8add5cf205b52a2b208e356cc8b0/tokio-postgres/src/copy_both.rs#L56).
|
||||
//
|
||||
@@ -167,77 +131,27 @@ impl PagestreamClient {
|
||||
//
|
||||
// NB: page_service doesn't have a use case to exit the `pagestream` mode currently.
|
||||
// => https://github.com/neondatabase/neon/issues/6390
|
||||
let ConnTaskRunning {
|
||||
cancel_on_client_drop,
|
||||
conn_task,
|
||||
} = {
|
||||
let mut guard = shared.lock().unwrap();
|
||||
match std::mem::replace(
|
||||
&mut *guard,
|
||||
PagestreamShared::ConnTaskCancelledJoinHandleReturnedOrDropped,
|
||||
) {
|
||||
PagestreamShared::ConnTaskRunning(conn_task_running) => conn_task_running,
|
||||
PagestreamShared::ConnTaskCancelledJoinHandleReturnedOrDropped => unreachable!(),
|
||||
}
|
||||
};
|
||||
let _ = cancel_on_client_drop.unwrap();
|
||||
let _ = cancel_conn_task.unwrap();
|
||||
conn_task.await.unwrap();
|
||||
|
||||
// Now drop the split copy_both.
|
||||
drop(sink);
|
||||
drop(stream);
|
||||
}
|
||||
|
||||
pub fn split(self) -> (PagestreamSender, PagestreamReceiver) {
|
||||
let Self {
|
||||
shared: _,
|
||||
sink,
|
||||
stream,
|
||||
} = self;
|
||||
(sink, stream)
|
||||
drop(copy_both);
|
||||
}
|
||||
|
||||
pub async fn getpage(
|
||||
&mut self,
|
||||
req: PagestreamGetPageRequest,
|
||||
) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
self.getpage_send(req).await?;
|
||||
self.getpage_recv().await
|
||||
}
|
||||
let req = PagestreamFeMessage::GetPage(req);
|
||||
let req: bytes::Bytes = req.serialize();
|
||||
// let mut req = tokio_util::io::ReaderStream::new(&req);
|
||||
let mut req = tokio_stream::once(Ok(req));
|
||||
|
||||
pub async fn getpage_send(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
|
||||
self.sink.getpage_send(req).await
|
||||
}
|
||||
self.copy_both.send_all(&mut req).await?;
|
||||
|
||||
pub async fn getpage_recv(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
self.stream.getpage_recv().await
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamSender {
|
||||
// TODO: maybe make this impl Sink instead for better composability?
|
||||
pub async fn send(&mut self, msg: PagestreamFeMessage) -> anyhow::Result<()> {
|
||||
let msg = msg.serialize();
|
||||
self.sink.send_all(&mut tokio_stream::once(Ok(msg))).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn getpage_send(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
|
||||
self.send(PagestreamFeMessage::GetPage(req)).await
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamReceiver {
|
||||
// TODO: maybe make this impl Stream instead for better composability?
|
||||
pub async fn recv(&mut self) -> anyhow::Result<PagestreamBeMessage> {
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.stream.next().await;
|
||||
let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
|
||||
let next: bytes::Bytes = next.unwrap()?;
|
||||
PagestreamBeMessage::deserialize(next)
|
||||
}
|
||||
|
||||
pub async fn getpage_recv(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
let next: PagestreamBeMessage = self.recv().await?;
|
||||
match next {
|
||||
let msg = PagestreamBeMessage::deserialize(next)?;
|
||||
match msg {
|
||||
PagestreamBeMessage::GetPage(p) => Ok(p),
|
||||
PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
|
||||
PagestreamBeMessage::Exists(_)
|
||||
@@ -246,14 +160,7 @@ impl PagestreamReceiver {
|
||||
| PagestreamBeMessage::GetSlruSegment(_) => {
|
||||
anyhow::bail!(
|
||||
"unexpected be message kind in response to getpage request: {}",
|
||||
next.kind()
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamBeMessage::Test(_) => {
|
||||
anyhow::bail!(
|
||||
"unexpected be message kind in response to getpage request: {}",
|
||||
next.kind()
|
||||
msg.kind()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ use rand::prelude::*;
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::info;
|
||||
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
use std::collections::HashSet;
|
||||
use std::future::Future;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::pin::Pin;
|
||||
@@ -63,10 +63,6 @@ pub(crate) struct Args {
|
||||
#[clap(long)]
|
||||
set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,
|
||||
|
||||
/// Queue depth generated in each client.
|
||||
#[clap(long, default_value = "1")]
|
||||
queue_depth: NonZeroUsize,
|
||||
|
||||
targets: Option<Vec<TenantTimelineId>>,
|
||||
}
|
||||
|
||||
@@ -302,7 +298,6 @@ async fn main_impl(
|
||||
start_work_barrier.wait().await;
|
||||
let client_start = Instant::now();
|
||||
let mut ticks_processed = 0;
|
||||
let mut inflight = VecDeque::new();
|
||||
while !cancel.is_cancelled() {
|
||||
// Detect if a request took longer than the RPS rate
|
||||
if let Some(period) = &rps_period {
|
||||
@@ -316,37 +311,31 @@ async fn main_impl(
|
||||
ticks_processed = periods_passed_until_now;
|
||||
}
|
||||
|
||||
while inflight.len() < args.queue_depth.get() {
|
||||
let start = Instant::now();
|
||||
let req = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
let start = Instant::now();
|
||||
let req = {
|
||||
let mut rng = rand::thread_rng();
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
client.getpage_send(req).await.unwrap();
|
||||
inflight.push_back(start);
|
||||
}
|
||||
|
||||
let start = inflight.pop_front().unwrap();
|
||||
client.getpage_recv().await.unwrap();
|
||||
not_modified_since: r.timeline_lsn,
|
||||
},
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
client.getpage(req).await.unwrap();
|
||||
let end = Instant::now();
|
||||
live_stats.request_done();
|
||||
ticks_processed += 1;
|
||||
|
||||
@@ -25,7 +25,6 @@ use tokio_tar::{Builder, EntryType, Header};
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::pgdatadir_mapping::Version;
|
||||
use crate::tenant::storage_layer::IoConcurrency;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
@@ -124,13 +123,6 @@ where
|
||||
full_backup,
|
||||
replica,
|
||||
ctx,
|
||||
io_concurrency: IoConcurrency::spawn_from_conf(
|
||||
timeline.conf,
|
||||
timeline
|
||||
.gate
|
||||
.enter()
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?,
|
||||
),
|
||||
};
|
||||
basebackup
|
||||
.send_tarball()
|
||||
@@ -152,7 +144,6 @@ where
|
||||
full_backup: bool,
|
||||
replica: bool,
|
||||
ctx: &'a RequestContext,
|
||||
io_concurrency: IoConcurrency,
|
||||
}
|
||||
|
||||
/// A sink that accepts SLRU blocks ordered by key and forwards
|
||||
@@ -312,7 +303,7 @@ where
|
||||
for part in slru_partitions.parts {
|
||||
let blocks = self
|
||||
.timeline
|
||||
.get_vectored(part, self.lsn, self.io_concurrency.clone(), self.ctx)
|
||||
.get_vectored(part, self.lsn, self.ctx)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
|
||||
@@ -367,7 +358,7 @@ where
|
||||
let start_time = Instant::now();
|
||||
let aux_files = self
|
||||
.timeline
|
||||
.list_aux_files(self.lsn, self.ctx, self.io_concurrency.clone())
|
||||
.list_aux_files(self.lsn, self.ctx)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
let aux_scan_time = start_time.elapsed();
|
||||
@@ -431,7 +422,7 @@ where
|
||||
}
|
||||
let repl_origins = self
|
||||
.timeline
|
||||
.get_replorigins(self.lsn, self.ctx, self.io_concurrency.clone())
|
||||
.get_replorigins(self.lsn, self.ctx)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
let n_origins = repl_origins.len();
|
||||
@@ -498,13 +489,7 @@ where
|
||||
for blknum in startblk..endblk {
|
||||
let img = self
|
||||
.timeline
|
||||
.get_rel_page_at_lsn(
|
||||
src,
|
||||
blknum,
|
||||
Version::Lsn(self.lsn),
|
||||
self.ctx,
|
||||
self.io_concurrency.clone(),
|
||||
)
|
||||
.get_rel_page_at_lsn(src, blknum, Version::Lsn(self.lsn), self.ctx)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
segment_data.extend_from_slice(&img[..]);
|
||||
|
||||
@@ -135,7 +135,6 @@ fn main() -> anyhow::Result<()> {
|
||||
info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
|
||||
info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol");
|
||||
info!(?conf.page_service_pipelining, "starting with page service pipelining config");
|
||||
info!(?conf.get_vectored_concurrent_io, "starting with get_vectored IO concurrency config");
|
||||
|
||||
// The tenants directory contains all the pageserver local disk state.
|
||||
// Create if not exists and make sure all the contents are durable before proceeding.
|
||||
|
||||
@@ -1,65 +0,0 @@
|
||||
use std::{
|
||||
io::{stdin, stdout, Read, Write},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use clap::Parser;
|
||||
use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
|
||||
use utils::{
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
connstr: String,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let Args {
|
||||
connstr,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = Args::parse();
|
||||
let client = pageserver_client::page_service::Client::new(connstr).await?;
|
||||
let client = client.pagestream(tenant_id, timeline_id).await?;
|
||||
let (mut sender, _receiver) = client.split();
|
||||
|
||||
eprintln!("filling the pipe");
|
||||
let mut msg = 0;
|
||||
loop {
|
||||
msg += 1;
|
||||
let fut = sender.send(pageserver_api::models::PagestreamFeMessage::Test(
|
||||
PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(23),
|
||||
not_modified_since: Lsn(23),
|
||||
},
|
||||
batch_key: 42,
|
||||
message: format!("message {}", msg),
|
||||
},
|
||||
));
|
||||
let Ok(res) = tokio::time::timeout(Duration::from_secs(10), fut).await else {
|
||||
eprintln!("pipe seems full");
|
||||
break;
|
||||
};
|
||||
let _: () = res?;
|
||||
}
|
||||
|
||||
let n = stdout().write(b"R")?;
|
||||
assert_eq!(n, 1);
|
||||
stdout().flush()?;
|
||||
|
||||
eprintln!("waiting for signal to tell us to exit");
|
||||
|
||||
let mut buf = [0u8; 1];
|
||||
stdin().read_exact(&mut buf)?;
|
||||
|
||||
eprintln!("termination signal received, exiting");
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
@@ -109,13 +109,13 @@ pub struct PageServerConf {
|
||||
/// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
|
||||
pub concurrent_tenant_warmup: ConfigurableSemaphore,
|
||||
|
||||
/// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
|
||||
/// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
|
||||
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
||||
/// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
|
||||
/// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
|
||||
/// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
|
||||
/// See the comment in `eviction_task` for details.
|
||||
///
|
||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
||||
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||
pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
|
||||
|
||||
// How often to collect metrics and send them to the metrics endpoint.
|
||||
@@ -191,8 +191,6 @@ pub struct PageServerConf {
|
||||
pub wal_receiver_protocol: PostgresClientProtocol,
|
||||
|
||||
pub page_service_pipelining: pageserver_api::config::PageServicePipeliningConfig,
|
||||
|
||||
pub get_vectored_concurrent_io: pageserver_api::config::GetVectoredConcurrentIo,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -354,7 +352,6 @@ impl PageServerConf {
|
||||
no_sync,
|
||||
wal_receiver_protocol,
|
||||
page_service_pipelining,
|
||||
get_vectored_concurrent_io,
|
||||
} = config_toml;
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
@@ -399,7 +396,6 @@ impl PageServerConf {
|
||||
import_pgdata_aws_endpoint_url,
|
||||
wal_receiver_protocol,
|
||||
page_service_pipelining,
|
||||
get_vectored_concurrent_io,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
@@ -513,10 +509,10 @@ impl ConfigurableSemaphore {
|
||||
/// Initializse using a non-zero amount of permits.
|
||||
///
|
||||
/// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
|
||||
/// feature such as [`Tenant::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||
/// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
|
||||
/// behave like [`futures::future::pending`], just waiting until new permits are added.
|
||||
///
|
||||
/// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
|
||||
/// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
|
||||
pub fn new(initial_permits: NonZeroUsize) -> Self {
|
||||
ConfigurableSemaphore {
|
||||
initial_permits,
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
|
||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||
use crate::tenant::tasks::BackgroundLoopKind;
|
||||
use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant};
|
||||
use crate::tenant::{mgr::TenantShardManager, LogicalSizeCalculationCause, TenantShard};
|
||||
use camino::Utf8PathBuf;
|
||||
use consumption_metrics::EventType;
|
||||
use itertools::Itertools as _;
|
||||
@@ -95,7 +95,7 @@ type Cache = HashMap<MetricsKey, NewRawMetric>;
|
||||
|
||||
pub async fn run(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
cancel: CancellationToken,
|
||||
) {
|
||||
let Some(metric_collection_endpoint) = conf.metric_collection_endpoint.as_ref() else {
|
||||
@@ -150,7 +150,7 @@ pub async fn run(
|
||||
/// Main thread that serves metrics collection
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn collect_metrics(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
metric_collection_endpoint: &Url,
|
||||
metric_collection_bucket: &Option<RemoteStorageConfig>,
|
||||
metric_collection_interval: Duration,
|
||||
@@ -362,7 +362,7 @@ async fn reschedule(
|
||||
|
||||
/// Caclculate synthetic size for each active tenant
|
||||
async fn calculate_synthetic_size_worker(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
synthetic_size_calculation_interval: Duration,
|
||||
cancel: CancellationToken,
|
||||
ctx: RequestContext,
|
||||
@@ -425,7 +425,7 @@ async fn calculate_synthetic_size_worker(
|
||||
}
|
||||
}
|
||||
|
||||
async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||
async fn calculate_and_log(tenant: &TenantShard, cancel: &CancellationToken, ctx: &RequestContext) {
|
||||
const CAUSE: LogicalSizeCalculationCause =
|
||||
LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::tenant::mgr::TenantManager;
|
||||
use crate::tenant::mgr::TenantShardManager;
|
||||
use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
|
||||
use chrono::{DateTime, Utc};
|
||||
use consumption_metrics::EventType;
|
||||
@@ -174,9 +174,9 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`Tenant::remote_size`]
|
||||
/// [`TenantShard::remote_size`]
|
||||
///
|
||||
/// [`Tenant::remote_size`]: crate::tenant::Tenant::remote_size
|
||||
/// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size
|
||||
const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
@@ -198,9 +198,9 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`Tenant::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
/// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
///
|
||||
/// [`Tenant::cached_synthetic_size`]: crate::tenant::Tenant::cached_synthetic_size
|
||||
/// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
|
||||
/// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker
|
||||
const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
@@ -213,7 +213,7 @@ impl MetricsKey {
|
||||
}
|
||||
|
||||
pub(super) async fn collect_all_metrics(
|
||||
tenant_manager: &Arc<TenantManager>,
|
||||
tenant_manager: &Arc<TenantShardManager>,
|
||||
cached_metrics: &Cache,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<NewRawMetric> {
|
||||
@@ -253,7 +253,7 @@ pub(super) async fn collect_all_metrics(
|
||||
|
||||
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
|
||||
where
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::Tenant>)>,
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::TenantShard>)>,
|
||||
{
|
||||
let mut current_metrics: Vec<NewRawMetric> = Vec::new();
|
||||
|
||||
@@ -307,7 +307,7 @@ impl TenantSnapshot {
|
||||
///
|
||||
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
||||
/// cannot just list timelines here.
|
||||
fn collect(t: &Arc<crate::tenant::Tenant>, resident_size: u64) -> Self {
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
|
||||
TenantSnapshot {
|
||||
resident_size,
|
||||
remote_size: t.remote_size(),
|
||||
|
||||
@@ -703,7 +703,7 @@ mod test {
|
||||
|
||||
use crate::{
|
||||
controller_upcall_client::RetryForeverError,
|
||||
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
||||
tenant::{harness::TenantShardHarness, storage_layer::DeltaLayerName},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
@@ -722,7 +722,7 @@ mod test {
|
||||
});
|
||||
|
||||
struct TestSetup {
|
||||
harness: TenantHarness,
|
||||
harness: TenantShardHarness,
|
||||
remote_fs_dir: Utf8PathBuf,
|
||||
storage: GenericRemoteStorage,
|
||||
mock_control_plane: MockControlPlane,
|
||||
@@ -825,7 +825,7 @@ mod test {
|
||||
|
||||
async fn setup(test_name: &str) -> anyhow::Result<TestSetup> {
|
||||
let test_name = Box::leak(Box::new(format!("deletion_queue__{test_name}")));
|
||||
let harness = TenantHarness::create(test_name).await?;
|
||||
let harness = TenantShardHarness::create(test_name).await?;
|
||||
|
||||
// We do not load() the harness: we only need its config and remote_storage
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ use crate::{
|
||||
metrics::disk_usage_based_eviction::METRICS,
|
||||
task_mgr::{self, BACKGROUND_RUNTIME},
|
||||
tenant::{
|
||||
mgr::TenantManager,
|
||||
mgr::TenantShardManager,
|
||||
remote_timeline_client::LayerFileMetadata,
|
||||
secondary::SecondaryTenant,
|
||||
storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint},
|
||||
@@ -166,7 +166,7 @@ pub fn launch_disk_usage_global_eviction_task(
|
||||
conf: &'static PageServerConf,
|
||||
storage: GenericRemoteStorage,
|
||||
state: Arc<State>,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
background_jobs_barrier: completion::Barrier,
|
||||
) -> Option<DiskUsageEvictionTask> {
|
||||
let Some(task_config) = &conf.disk_usage_based_eviction else {
|
||||
@@ -203,7 +203,7 @@ async fn disk_usage_eviction_task(
|
||||
state: &State,
|
||||
task_config: &DiskUsageEvictionTaskConfig,
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
cancel: CancellationToken,
|
||||
) {
|
||||
scopeguard::defer! {
|
||||
@@ -265,7 +265,7 @@ async fn disk_usage_eviction_task_iteration(
|
||||
state: &State,
|
||||
task_config: &DiskUsageEvictionTaskConfig,
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_manager: &Arc<TenantManager>,
|
||||
tenant_manager: &Arc<TenantShardManager>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let tenants_dir = tenant_manager.get_conf().tenants_path();
|
||||
@@ -361,7 +361,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
state: &State,
|
||||
_storage: &GenericRemoteStorage,
|
||||
usage_pre: U,
|
||||
tenant_manager: &Arc<TenantManager>,
|
||||
tenant_manager: &Arc<TenantShardManager>,
|
||||
eviction_order: EvictionOrder,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<IterationOutcome<U>> {
|
||||
@@ -788,7 +788,7 @@ enum EvictionCandidates {
|
||||
/// - tenant B 1 layer
|
||||
/// - tenant C 8 layers
|
||||
async fn collect_eviction_candidates(
|
||||
tenant_manager: &Arc<TenantManager>,
|
||||
tenant_manager: &Arc<TenantShardManager>,
|
||||
eviction_order: EvictionOrder,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<EvictionCandidates> {
|
||||
|
||||
@@ -74,7 +74,7 @@ use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::config::{LocationConf, TenantConfOpt};
|
||||
use crate::tenant::mgr::GetActiveTenantError;
|
||||
use crate::tenant::mgr::{
|
||||
GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError,
|
||||
GetTenantError, TenantMapError, TenantMapInsertError, TenantShardManager, TenantSlotError,
|
||||
TenantSlotUpsertError, TenantStateError,
|
||||
};
|
||||
use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
|
||||
@@ -84,7 +84,6 @@ use crate::tenant::remote_timeline_client::list_remote_tenant_shards;
|
||||
use crate::tenant::remote_timeline_client::list_remote_timelines;
|
||||
use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::IoConcurrency;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::import_pgdata;
|
||||
@@ -131,7 +130,7 @@ pub(crate) const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
|
||||
|
||||
pub struct State {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
allowlist_routes: &'static [&'static str],
|
||||
remote_storage: GenericRemoteStorage,
|
||||
@@ -146,7 +145,7 @@ impl State {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
@@ -1762,7 +1761,7 @@ async fn update_tenant_config_handler(
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
@@ -1803,7 +1802,7 @@ async fn patch_tenant_config_handler(
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
@@ -2572,7 +2571,7 @@ async fn timeline_collect_keyspace(
|
||||
}
|
||||
|
||||
async fn active_timeline_of_active_tenant(
|
||||
tenant_manager: &TenantManager,
|
||||
tenant_manager: &TenantShardManager,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<Arc<Timeline>, ApiError> {
|
||||
@@ -2939,15 +2938,8 @@ async fn list_aux_files(
|
||||
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
||||
.await?;
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
state.conf,
|
||||
timeline.gate.enter().map_err(|_| ApiError::Cancelled)?,
|
||||
);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let files = timeline
|
||||
.list_aux_files(body.lsn, &ctx, io_concurrency)
|
||||
.await?;
|
||||
let files = timeline.list_aux_files(body.lsn, &ctx).await?;
|
||||
json_response(StatusCode::OK, files)
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ pub mod walredo;
|
||||
use camino::Utf8Path;
|
||||
use deletion_queue::DeletionQueue;
|
||||
use tenant::{
|
||||
mgr::{BackgroundPurges, TenantManager},
|
||||
mgr::{BackgroundPurges, TenantShardManager},
|
||||
secondary,
|
||||
};
|
||||
use tracing::{info, info_span};
|
||||
@@ -81,7 +81,7 @@ pub async fn shutdown_pageserver(
|
||||
page_service: page_service::Listener,
|
||||
consumption_metrics_worker: ConsumptionMetricsTasks,
|
||||
disk_usage_eviction_task: Option<DiskUsageEvictionTask>,
|
||||
tenant_manager: &TenantManager,
|
||||
tenant_manager: &TenantShardManager,
|
||||
background_purges: BackgroundPurges,
|
||||
mut deletion_queue: DeletionQueue,
|
||||
secondary_controller_tasks: secondary::GlobalTasks,
|
||||
|
||||
@@ -3,7 +3,7 @@ use metrics::{
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
||||
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||
Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -38,9 +38,6 @@ pub(crate) enum StorageTimeOperation {
|
||||
#[strum(serialize = "layer flush")]
|
||||
LayerFlush,
|
||||
|
||||
#[strum(serialize = "layer flush delay")]
|
||||
LayerFlushDelay,
|
||||
|
||||
#[strum(serialize = "compact")]
|
||||
Compact,
|
||||
|
||||
@@ -103,30 +100,71 @@ pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
register_uint_gauge!(
|
||||
"pageserver_concurrent_initdb",
|
||||
"Number of initdb processes running"
|
||||
// Metrics collected on operations on the storage repository.
|
||||
#[derive(
|
||||
Clone, Copy, enum_map::Enum, strum_macros::EnumString, strum_macros::Display, IntoStaticStr,
|
||||
)]
|
||||
pub(crate) enum GetKind {
|
||||
Singular,
|
||||
Vectored,
|
||||
}
|
||||
|
||||
pub(crate) struct ReconstructTimeMetrics {
|
||||
singular: Histogram,
|
||||
vectored: Histogram,
|
||||
}
|
||||
|
||||
pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
|
||||
let inner = register_histogram_vec!(
|
||||
"pageserver_getpage_reconstruct_seconds",
|
||||
"Time spent in reconstruct_value (reconstruct a page from deltas)",
|
||||
&["get_kind"],
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
.expect("failed to define a metric");
|
||||
|
||||
ReconstructTimeMetrics {
|
||||
singular: inner.with_label_values(&[GetKind::Singular.into()]),
|
||||
vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
|
||||
}
|
||||
});
|
||||
|
||||
pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_initdb_semaphore_seconds_global",
|
||||
"Time spent getting a permit from the global initdb semaphore",
|
||||
STORAGE_OP_BUCKETS.into()
|
||||
)
|
||||
.expect("failed to define metric")
|
||||
});
|
||||
impl ReconstructTimeMetrics {
|
||||
pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
|
||||
match get_kind {
|
||||
GetKind::Singular => &self.singular,
|
||||
GetKind::Vectored => &self.vectored,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_initdb_seconds_global",
|
||||
"Time spent performing initdb",
|
||||
STORAGE_OP_BUCKETS.into()
|
||||
pub(crate) struct ReconstructDataTimeMetrics {
|
||||
singular: Histogram,
|
||||
vectored: Histogram,
|
||||
}
|
||||
|
||||
impl ReconstructDataTimeMetrics {
|
||||
pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
|
||||
match get_kind {
|
||||
GetKind::Singular => &self.singular,
|
||||
GetKind::Vectored => &self.vectored,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<ReconstructDataTimeMetrics> = Lazy::new(|| {
|
||||
let inner = register_histogram_vec!(
|
||||
"pageserver_getpage_get_reconstruct_data_seconds",
|
||||
"Time spent in get_reconstruct_value_data",
|
||||
&["get_kind"],
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define metric")
|
||||
.expect("failed to define a metric");
|
||||
|
||||
ReconstructDataTimeMetrics {
|
||||
singular: inner.with_label_values(&[GetKind::Singular.into()]),
|
||||
vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
|
||||
}
|
||||
});
|
||||
|
||||
pub(crate) struct GetVectoredLatency {
|
||||
@@ -398,6 +436,15 @@ pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
|
||||
register_gauge_vec!(
|
||||
"pageserver_flush_wait_upload_seconds",
|
||||
"Time spent waiting for preceding uploads during layer flush",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_last_record_lsn",
|
||||
@@ -873,7 +920,7 @@ pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
.expect("Failed to register metric")
|
||||
});
|
||||
|
||||
/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
|
||||
/// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
|
||||
/// like how long it took to load.
|
||||
///
|
||||
/// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
|
||||
@@ -1416,8 +1463,6 @@ pub enum SmgrQueryType {
|
||||
GetPageAtLsn,
|
||||
GetDbSize,
|
||||
GetSlruSegment,
|
||||
#[cfg(feature = "testing")]
|
||||
Test,
|
||||
}
|
||||
|
||||
pub(crate) struct SmgrQueryTimePerTimeline {
|
||||
@@ -2477,19 +2522,12 @@ impl StorageTimeMetricsTimer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the elapsed duration of the timer.
|
||||
pub fn elapsed(&self) -> Duration {
|
||||
self.start.elapsed()
|
||||
}
|
||||
|
||||
/// Record the time from creation to now and return it.
|
||||
pub fn stop_and_record(self) -> Duration {
|
||||
let duration = self.elapsed();
|
||||
let seconds = duration.as_secs_f64();
|
||||
self.metrics.timeline_sum.inc_by(seconds);
|
||||
/// Record the time from creation to now.
|
||||
pub fn stop_and_record(self) {
|
||||
let duration = self.start.elapsed().as_secs_f64();
|
||||
self.metrics.timeline_sum.inc_by(duration);
|
||||
self.metrics.timeline_count.inc();
|
||||
self.metrics.global_histogram.observe(seconds);
|
||||
duration
|
||||
self.metrics.global_histogram.observe(duration);
|
||||
}
|
||||
|
||||
/// Turns this timer into a timer, which will always record -- usually this means recording
|
||||
@@ -2509,13 +2547,6 @@ impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
|
||||
}
|
||||
}
|
||||
|
||||
impl AlwaysRecordingStorageTimeMetricsTimer {
|
||||
/// Returns the elapsed duration of the timer.
|
||||
pub fn elapsed(&self) -> Duration {
|
||||
self.0.as_ref().expect("not dropped yet").elapsed()
|
||||
}
|
||||
}
|
||||
|
||||
/// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
|
||||
/// timeline total sum and count.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -2568,7 +2599,7 @@ pub(crate) struct TimelineMetrics {
|
||||
shard_id: String,
|
||||
timeline_id: String,
|
||||
pub flush_time_histo: StorageTimeMetrics,
|
||||
pub flush_delay_histo: StorageTimeMetrics,
|
||||
pub flush_wait_upload_time_gauge: Gauge,
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
pub create_images_time_histo: StorageTimeMetrics,
|
||||
pub logical_size_histo: StorageTimeMetrics,
|
||||
@@ -2614,12 +2645,9 @@ impl TimelineMetrics {
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let flush_delay_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::LayerFlushDelay,
|
||||
&tenant_id,
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
let compact_time_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::Compact,
|
||||
&tenant_id,
|
||||
@@ -2765,7 +2793,7 @@ impl TimelineMetrics {
|
||||
shard_id,
|
||||
timeline_id,
|
||||
flush_time_histo,
|
||||
flush_delay_histo,
|
||||
flush_wait_upload_time_gauge,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
logical_size_histo,
|
||||
@@ -2815,6 +2843,14 @@ impl TimelineMetrics {
|
||||
self.resident_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
|
||||
self.flush_wait_upload_time_gauge.add(duration);
|
||||
crate::metrics::FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
|
||||
.unwrap()
|
||||
.add(duration);
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown(&self) {
|
||||
let was_shutdown = self
|
||||
.shutdown
|
||||
@@ -2832,6 +2868,7 @@ impl TimelineMetrics {
|
||||
let shard_id = &self.shard_id;
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
{
|
||||
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
|
||||
@@ -3854,6 +3891,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
|
||||
});
|
||||
|
||||
// Custom
|
||||
Lazy::force(&RECONSTRUCT_TIME);
|
||||
Lazy::force(&BASEBACKUP_QUERY_TIME);
|
||||
Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
|
||||
Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
|
||||
|
||||
@@ -39,7 +39,6 @@ use tokio::io::{AsyncWriteExt, BufWriter};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::sync::gate::{Gate, GateGuard};
|
||||
use utils::sync::spsc_fold;
|
||||
use utils::{
|
||||
auth::{Claims, Scope, SwappableJwtAuth},
|
||||
@@ -60,9 +59,8 @@ use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_i
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME};
|
||||
use crate::tenant::mgr::ShardSelector;
|
||||
use crate::tenant::mgr::TenantManager;
|
||||
use crate::tenant::mgr::TenantShardManager;
|
||||
use crate::tenant::mgr::{GetActiveTenantError, GetTenantError, ShardResolveResult};
|
||||
use crate::tenant::storage_layer::IoConcurrency;
|
||||
use crate::tenant::timeline::{self, WaitLsnError};
|
||||
use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
@@ -92,12 +90,11 @@ pub struct Listener {
|
||||
pub struct Connections {
|
||||
cancel: CancellationToken,
|
||||
tasks: tokio::task::JoinSet<ConnectionHandlerResult>,
|
||||
gate: Gate,
|
||||
}
|
||||
|
||||
pub fn spawn(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
pg_auth: Option<Arc<SwappableJwtAuth>>,
|
||||
tcp_listener: tokio::net::TcpListener,
|
||||
) -> Listener {
|
||||
@@ -113,7 +110,6 @@ pub fn spawn(
|
||||
let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error(
|
||||
"libpq listener",
|
||||
libpq_listener_main(
|
||||
conf,
|
||||
tenant_manager,
|
||||
pg_auth,
|
||||
tcp_listener,
|
||||
@@ -138,16 +134,11 @@ impl Listener {
|
||||
}
|
||||
impl Connections {
|
||||
pub(crate) async fn shutdown(self) {
|
||||
let Self {
|
||||
cancel,
|
||||
mut tasks,
|
||||
gate,
|
||||
} = self;
|
||||
let Self { cancel, mut tasks } = self;
|
||||
cancel.cancel();
|
||||
while let Some(res) = tasks.join_next().await {
|
||||
Self::handle_connection_completion(res);
|
||||
}
|
||||
gate.close().await;
|
||||
}
|
||||
|
||||
fn handle_connection_completion(res: Result<anyhow::Result<()>, tokio::task::JoinError>) {
|
||||
@@ -167,10 +158,8 @@ impl Connections {
|
||||
/// Returns Ok(()) upon cancellation via `cancel`, returning the set of
|
||||
/// open connections.
|
||||
///
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn libpq_listener_main(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
listener: tokio::net::TcpListener,
|
||||
auth_type: AuthType,
|
||||
@@ -179,15 +168,9 @@ pub async fn libpq_listener_main(
|
||||
listener_cancel: CancellationToken,
|
||||
) -> Connections {
|
||||
let connections_cancel = CancellationToken::new();
|
||||
let connections_gate = Gate::default();
|
||||
let mut connection_handler_tasks = tokio::task::JoinSet::default();
|
||||
|
||||
loop {
|
||||
let gate_guard = match connections_gate.enter() {
|
||||
Ok(guard) => guard,
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
let accepted = tokio::select! {
|
||||
biased;
|
||||
_ = listener_cancel.cancelled() => break,
|
||||
@@ -207,7 +190,6 @@ pub async fn libpq_listener_main(
|
||||
let connection_ctx = listener_ctx
|
||||
.detached_child(TaskKind::PageRequestHandler, DownloadBehavior::Download);
|
||||
connection_handler_tasks.spawn(page_service_conn_main(
|
||||
conf,
|
||||
tenant_manager.clone(),
|
||||
local_auth,
|
||||
socket,
|
||||
@@ -215,7 +197,6 @@ pub async fn libpq_listener_main(
|
||||
pipelining_config.clone(),
|
||||
connection_ctx,
|
||||
connections_cancel.child_token(),
|
||||
gate_guard,
|
||||
));
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -230,24 +211,20 @@ pub async fn libpq_listener_main(
|
||||
Connections {
|
||||
cancel: connections_cancel,
|
||||
tasks: connection_handler_tasks,
|
||||
gate: connections_gate,
|
||||
}
|
||||
}
|
||||
|
||||
type ConnectionHandlerResult = anyhow::Result<()>;
|
||||
|
||||
#[instrument(skip_all, fields(peer_addr))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn page_service_conn_main(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
socket: tokio::net::TcpStream,
|
||||
auth_type: AuthType,
|
||||
pipelining_config: PageServicePipeliningConfig,
|
||||
connection_ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
gate_guard: GateGuard,
|
||||
) -> ConnectionHandlerResult {
|
||||
let _guard = LIVE_CONNECTIONS
|
||||
.with_label_values(&["page_service"])
|
||||
@@ -297,13 +274,11 @@ async fn page_service_conn_main(
|
||||
// But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
|
||||
// and create the per-query context in process_query ourselves.
|
||||
let mut conn_handler = PageServerHandler::new(
|
||||
conf,
|
||||
tenant_manager,
|
||||
auth,
|
||||
pipelining_config,
|
||||
connection_ctx,
|
||||
cancel.clone(),
|
||||
gate_guard,
|
||||
);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
|
||||
|
||||
@@ -335,7 +310,6 @@ async fn page_service_conn_main(
|
||||
}
|
||||
|
||||
struct PageServerHandler {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
claims: Option<Claims>,
|
||||
|
||||
@@ -351,8 +325,6 @@ struct PageServerHandler {
|
||||
timeline_handles: Option<TimelineHandles>,
|
||||
|
||||
pipelining_config: PageServicePipeliningConfig,
|
||||
|
||||
gate_guard: GateGuard,
|
||||
}
|
||||
|
||||
struct TimelineHandles {
|
||||
@@ -365,7 +337,7 @@ struct TimelineHandles {
|
||||
}
|
||||
|
||||
impl TimelineHandles {
|
||||
fn new(tenant_manager: Arc<TenantManager>) -> Self {
|
||||
fn new(tenant_manager: Arc<TenantShardManager>) -> Self {
|
||||
Self {
|
||||
wrapper: TenantManagerWrapper {
|
||||
tenant_manager,
|
||||
@@ -407,7 +379,7 @@ impl TimelineHandles {
|
||||
}
|
||||
|
||||
pub(crate) struct TenantManagerWrapper {
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
// We do not support switching tenant_id on a connection at this point.
|
||||
// We can can add support for this later if needed without changing
|
||||
// the protocol.
|
||||
@@ -583,52 +555,37 @@ struct BatchedGetPageRequest {
|
||||
timer: SmgrOpTimer,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
struct BatchedTestRequest {
|
||||
req: models::PagestreamTestRequest,
|
||||
timer: SmgrOpTimer,
|
||||
}
|
||||
|
||||
/// NB: we only hold [`timeline::handle::WeakHandle`] inside this enum,
|
||||
/// so that we don't keep the [`Timeline::gate`] open while the batch
|
||||
/// is being built up inside the [`spsc_fold`] (pagestream pipelining).
|
||||
enum BatchedFeMessage {
|
||||
Exists {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::WeakHandle<TenantManagerTypes>,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamExistsRequest,
|
||||
},
|
||||
Nblocks {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::WeakHandle<TenantManagerTypes>,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamNblocksRequest,
|
||||
},
|
||||
GetPage {
|
||||
span: Span,
|
||||
shard: timeline::handle::WeakHandle<TenantManagerTypes>,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
effective_request_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
|
||||
},
|
||||
DbSize {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::WeakHandle<TenantManagerTypes>,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamDbSizeRequest,
|
||||
},
|
||||
GetSlruSegment {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: timeline::handle::WeakHandle<TenantManagerTypes>,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
req: models::PagestreamGetSlruSegmentRequest,
|
||||
},
|
||||
#[cfg(feature = "testing")]
|
||||
Test {
|
||||
span: Span,
|
||||
shard: timeline::handle::WeakHandle<TenantManagerTypes>,
|
||||
requests: Vec<BatchedTestRequest>,
|
||||
},
|
||||
RespondError {
|
||||
span: Span,
|
||||
error: BatchedPageStreamError,
|
||||
@@ -649,12 +606,6 @@ impl BatchedFeMessage {
|
||||
page.timer.observe_execution_start(at);
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
BatchedFeMessage::Test { requests, .. } => {
|
||||
for req in requests {
|
||||
req.timer.observe_execution_start(at);
|
||||
}
|
||||
}
|
||||
BatchedFeMessage::RespondError { .. } => {}
|
||||
}
|
||||
}
|
||||
@@ -662,23 +613,19 @@ impl BatchedFeMessage {
|
||||
|
||||
impl PageServerHandler {
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
pipelining_config: PageServicePipeliningConfig,
|
||||
connection_ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
gate_guard: GateGuard,
|
||||
) -> Self {
|
||||
PageServerHandler {
|
||||
conf,
|
||||
auth,
|
||||
claims: None,
|
||||
connection_ctx,
|
||||
timeline_handles: Some(TimelineHandles::new(tenant_manager)),
|
||||
cancel,
|
||||
pipelining_config,
|
||||
gate_guard,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -788,7 +735,7 @@ impl PageServerHandler {
|
||||
BatchedFeMessage::Exists {
|
||||
span,
|
||||
timer,
|
||||
shard: shard.downgrade(),
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
@@ -807,7 +754,7 @@ impl PageServerHandler {
|
||||
BatchedFeMessage::Nblocks {
|
||||
span,
|
||||
timer,
|
||||
shard: shard.downgrade(),
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
@@ -826,7 +773,7 @@ impl PageServerHandler {
|
||||
BatchedFeMessage::DbSize {
|
||||
span,
|
||||
timer,
|
||||
shard: shard.downgrade(),
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
@@ -845,7 +792,7 @@ impl PageServerHandler {
|
||||
BatchedFeMessage::GetSlruSegment {
|
||||
span,
|
||||
timer,
|
||||
shard: shard.downgrade(),
|
||||
shard,
|
||||
req,
|
||||
}
|
||||
}
|
||||
@@ -897,7 +844,6 @@ impl PageServerHandler {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// We're holding the Handle
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
req.hdr.request_lsn,
|
||||
@@ -915,27 +861,11 @@ impl PageServerHandler {
|
||||
};
|
||||
BatchedFeMessage::GetPage {
|
||||
span,
|
||||
shard: shard.downgrade(),
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessage::Test(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_test_request");
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
.await?;
|
||||
let timer =
|
||||
record_op_start_and_throttle(&shard, metrics::SmgrQueryType::Test, received_at)
|
||||
.await?;
|
||||
BatchedFeMessage::Test {
|
||||
span,
|
||||
shard: shard.downgrade(),
|
||||
requests: vec![BatchedTestRequest { req, timer }],
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(Some(batched_msg))
|
||||
}
|
||||
@@ -977,7 +907,9 @@ impl PageServerHandler {
|
||||
assert_eq!(accum_pages.len(), max_batch_size.get());
|
||||
return false;
|
||||
}
|
||||
if !accum_shard.is_same_handle_as(&this_shard) {
|
||||
if (accum_shard.tenant_shard_id, accum_shard.timeline_id)
|
||||
!= (this_shard.tenant_shard_id, this_shard.timeline_id)
|
||||
{
|
||||
trace!(%accum_lsn, %this_lsn, "stopping batching because timeline object mismatch");
|
||||
// TODO: we _could_ batch & execute each shard seperately (and in parallel).
|
||||
// But the current logic for keeping responses in order does not support that.
|
||||
@@ -996,44 +928,6 @@ impl PageServerHandler {
|
||||
accum_pages.extend(this_pages);
|
||||
Ok(())
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
(
|
||||
Ok(BatchedFeMessage::Test {
|
||||
shard: accum_shard,
|
||||
requests: accum_requests,
|
||||
..
|
||||
}),
|
||||
BatchedFeMessage::Test {
|
||||
shard: this_shard,
|
||||
requests: this_requests,
|
||||
..
|
||||
},
|
||||
) if (|| {
|
||||
assert!(this_requests.len() == 1);
|
||||
if accum_requests.len() >= max_batch_size.get() {
|
||||
trace!(%max_batch_size, "stopping batching because of batch size");
|
||||
assert_eq!(accum_requests.len(), max_batch_size.get());
|
||||
return false;
|
||||
}
|
||||
if !accum_shard.is_same_handle_as(&this_shard) {
|
||||
trace!("stopping batching because timeline object mismatch");
|
||||
// TODO: we _could_ batch & execute each shard seperately (and in parallel).
|
||||
// But the current logic for keeping responses in order does not support that.
|
||||
return false;
|
||||
}
|
||||
let this_batch_key = this_requests[0].req.batch_key;
|
||||
let accum_batch_key = accum_requests[0].req.batch_key;
|
||||
if this_requests[0].req.batch_key != accum_requests[0].req.batch_key {
|
||||
trace!(%accum_batch_key, %this_batch_key, "stopping batching because batch key changed");
|
||||
return false;
|
||||
}
|
||||
true
|
||||
})() =>
|
||||
{
|
||||
// ok to batch
|
||||
accum_requests.extend(this_requests);
|
||||
Ok(())
|
||||
}
|
||||
// something batched already but this message is unbatchable
|
||||
(_, this_msg) => {
|
||||
// by default, don't continue batching
|
||||
@@ -1047,7 +941,6 @@ impl PageServerHandler {
|
||||
&mut self,
|
||||
pgb_writer: &mut PostgresBackend<IO>,
|
||||
batch: BatchedFeMessage,
|
||||
io_concurrency: IoConcurrency,
|
||||
cancel: &CancellationToken,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
@@ -1076,7 +969,7 @@ impl PageServerHandler {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||
(
|
||||
vec![self
|
||||
.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
|
||||
.handle_get_rel_exists_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))
|
||||
@@ -1093,7 +986,7 @@ impl PageServerHandler {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||
(
|
||||
vec![self
|
||||
.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
|
||||
.handle_get_nblocks_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))
|
||||
@@ -1114,10 +1007,9 @@ impl PageServerHandler {
|
||||
trace!(npages, "handling getpage request");
|
||||
let res = self
|
||||
.handle_get_page_at_lsn_request_batched(
|
||||
&*shard.upgrade()?,
|
||||
&shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
io_concurrency,
|
||||
ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
@@ -1137,7 +1029,7 @@ impl PageServerHandler {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||
(
|
||||
vec![self
|
||||
.handle_db_size_request(&*shard.upgrade()?, &req, ctx)
|
||||
.handle_db_size_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))
|
||||
@@ -1154,7 +1046,7 @@ impl PageServerHandler {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||
(
|
||||
vec![self
|
||||
.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
|
||||
.handle_get_slru_segment_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))
|
||||
@@ -1162,27 +1054,6 @@ impl PageServerHandler {
|
||||
span,
|
||||
)
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
BatchedFeMessage::Test {
|
||||
span,
|
||||
shard,
|
||||
requests,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::test");
|
||||
(
|
||||
{
|
||||
let npages = requests.len();
|
||||
trace!(npages, "handling getpage request");
|
||||
let res = self
|
||||
.handle_test_request_batch(&*shard.upgrade()?, requests, ctx)
|
||||
.instrument(span.clone())
|
||||
.await;
|
||||
assert_eq!(res.len(), npages);
|
||||
res
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::RespondError { span, error } => {
|
||||
// We've already decided to respond with an error, so we don't need to
|
||||
// call the handler.
|
||||
@@ -1322,17 +1193,6 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
self.conf,
|
||||
match self.gate_guard.try_clone() {
|
||||
Ok(guard) => guard,
|
||||
Err(_) => {
|
||||
info!("shutdown request received in page handler");
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
let pgb_reader = pgb
|
||||
.split()
|
||||
.context("implementation error: split pgb into reader and writer")?;
|
||||
@@ -1354,7 +1214,6 @@ impl PageServerHandler {
|
||||
request_span,
|
||||
pipelining_config,
|
||||
protocol_version,
|
||||
io_concurrency,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1368,7 +1227,6 @@ impl PageServerHandler {
|
||||
timeline_handles,
|
||||
request_span,
|
||||
protocol_version,
|
||||
io_concurrency,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1396,7 +1254,6 @@ impl PageServerHandler {
|
||||
mut timeline_handles: TimelineHandles,
|
||||
request_span: Span,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
io_concurrency: IoConcurrency,
|
||||
ctx: &RequestContext,
|
||||
) -> (
|
||||
(PostgresBackendReader<IO>, TimelineHandles),
|
||||
@@ -1431,14 +1288,7 @@ impl PageServerHandler {
|
||||
};
|
||||
|
||||
let err = self
|
||||
.pagesteam_handle_batched_message(
|
||||
pgb_writer,
|
||||
msg,
|
||||
io_concurrency.clone(),
|
||||
&cancel,
|
||||
protocol_version,
|
||||
ctx,
|
||||
)
|
||||
.pagesteam_handle_batched_message(pgb_writer, msg, &cancel, protocol_version, ctx)
|
||||
.await;
|
||||
match err {
|
||||
Ok(()) => {}
|
||||
@@ -1462,7 +1312,6 @@ impl PageServerHandler {
|
||||
request_span: Span,
|
||||
pipelining_config: PageServicePipeliningConfigPipelined,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
io_concurrency: IoConcurrency,
|
||||
ctx: &RequestContext,
|
||||
) -> (
|
||||
(PostgresBackendReader<IO>, TimelineHandles),
|
||||
@@ -1606,7 +1455,6 @@ impl PageServerHandler {
|
||||
self.pagesteam_handle_batched_message(
|
||||
pgb_writer,
|
||||
batch,
|
||||
io_concurrency.clone(),
|
||||
&cancel,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
@@ -1863,7 +1711,6 @@ impl PageServerHandler {
|
||||
timeline: &Timeline,
|
||||
effective_lsn: Lsn,
|
||||
requests: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
|
||||
io_concurrency: IoConcurrency,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -1890,7 +1737,6 @@ impl PageServerHandler {
|
||||
.get_rel_page_at_lsn_batched(
|
||||
requests.iter().map(|p| (&p.req.rel, &p.req.blkno)),
|
||||
effective_lsn,
|
||||
io_concurrency,
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
@@ -1945,51 +1791,6 @@ impl PageServerHandler {
|
||||
))
|
||||
}
|
||||
|
||||
// NB: this impl mimics what we do for batched getpage requests.
|
||||
#[cfg(feature = "testing")]
|
||||
#[instrument(skip_all, fields(shard_id))]
|
||||
async fn handle_test_request_batch(
|
||||
&mut self,
|
||||
timeline: &Timeline,
|
||||
requests: Vec<BatchedTestRequest>,
|
||||
_ctx: &RequestContext,
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
|
||||
// real requests would do something with the timeline
|
||||
let mut results = Vec::with_capacity(requests.len());
|
||||
for _req in requests.iter() {
|
||||
tokio::task::yield_now().await;
|
||||
|
||||
results.push({
|
||||
if timeline.cancel.is_cancelled() {
|
||||
Err(PageReconstructError::Cancelled)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: avoid creating the new Vec here
|
||||
Vec::from_iter(
|
||||
requests
|
||||
.into_iter()
|
||||
.zip(results.into_iter())
|
||||
.map(|(req, res)| {
|
||||
res.map(|()| {
|
||||
(
|
||||
PagestreamBeMessage::Test(models::PagestreamTestResponse {
|
||||
req: req.req.clone(),
|
||||
}),
|
||||
req.timer,
|
||||
)
|
||||
})
|
||||
.map_err(|e| BatchedPageStreamError {
|
||||
err: PageStreamError::from(e),
|
||||
req: req.req.hdr,
|
||||
})
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
/// Note on "fullbackup":
|
||||
/// Full basebackups should only be used for debugging purposes.
|
||||
/// Originally, it was introduced to enable breaking storage format changes,
|
||||
@@ -2605,14 +2406,6 @@ impl From<GetActiveTimelineError> for QueryError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::tenant::timeline::handle::HandleUpgradeError> for QueryError {
|
||||
fn from(e: crate::tenant::timeline::handle::HandleUpgradeError) -> Self {
|
||||
match e {
|
||||
crate::tenant::timeline::handle::HandleUpgradeError::ShutDown => QueryError::Shutdown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_tracing_field_shard_id(timeline: &Timeline) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();
|
||||
tracing::Span::current().record(
|
||||
|
||||
@@ -17,7 +17,6 @@ use crate::span::{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id,
|
||||
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
|
||||
};
|
||||
use crate::tenant::storage_layer::IoConcurrency;
|
||||
use crate::tenant::timeline::GetVectoredError;
|
||||
use anyhow::{ensure, Context};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
@@ -201,7 +200,6 @@ impl Timeline {
|
||||
blknum: BlockNumber,
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
match version {
|
||||
Version::Lsn(effective_lsn) => {
|
||||
@@ -210,7 +208,6 @@ impl Timeline {
|
||||
.get_rel_page_at_lsn_batched(
|
||||
pages.iter().map(|(tag, blknum)| (tag, blknum)),
|
||||
effective_lsn,
|
||||
io_concurrency.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
@@ -249,7 +246,6 @@ impl Timeline {
|
||||
&self,
|
||||
pages: impl ExactSizeIterator<Item = (&RelTag, &BlockNumber)>,
|
||||
effective_lsn: Lsn,
|
||||
io_concurrency: IoConcurrency,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<Bytes, PageReconstructError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
@@ -313,10 +309,7 @@ impl Timeline {
|
||||
acc.to_keyspace()
|
||||
};
|
||||
|
||||
match self
|
||||
.get_vectored(keyspace, effective_lsn, io_concurrency, ctx)
|
||||
.await
|
||||
{
|
||||
match self.get_vectored(keyspace, effective_lsn, ctx).await {
|
||||
Ok(results) => {
|
||||
for (key, res) in results {
|
||||
let mut key_slots = keys_slots.remove(&key).unwrap().into_iter();
|
||||
@@ -896,15 +889,9 @@ impl Timeline {
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<HashMap<String, Bytes>, PageReconstructError> {
|
||||
let kv = self
|
||||
.scan(
|
||||
KeySpace::single(Key::metadata_aux_key_range()),
|
||||
lsn,
|
||||
ctx,
|
||||
io_concurrency,
|
||||
)
|
||||
.scan(KeySpace::single(Key::metadata_aux_key_range()), lsn, ctx)
|
||||
.await?;
|
||||
let mut result = HashMap::new();
|
||||
let mut sz = 0;
|
||||
@@ -927,9 +914,8 @@ impl Timeline {
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<(), PageReconstructError> {
|
||||
self.list_aux_files_v2(lsn, ctx, io_concurrency).await?;
|
||||
self.list_aux_files_v2(lsn, ctx).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -937,24 +923,17 @@ impl Timeline {
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<HashMap<String, Bytes>, PageReconstructError> {
|
||||
self.list_aux_files_v2(lsn, ctx, io_concurrency).await
|
||||
self.list_aux_files_v2(lsn, ctx).await
|
||||
}
|
||||
|
||||
pub(crate) async fn get_replorigins(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
io_concurrency: IoConcurrency,
|
||||
) -> Result<HashMap<RepOriginId, Lsn>, PageReconstructError> {
|
||||
let kv = self
|
||||
.scan(
|
||||
KeySpace::single(repl_origin_key_range()),
|
||||
lsn,
|
||||
ctx,
|
||||
io_concurrency,
|
||||
)
|
||||
.scan(KeySpace::single(repl_origin_key_range()), lsn, ctx)
|
||||
.await?;
|
||||
let mut result = HashMap::new();
|
||||
for (k, v) in kv {
|
||||
@@ -2425,13 +2404,13 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION};
|
||||
use crate::{tenant::harness::TenantShardHarness, DEFAULT_PG_VERSION};
|
||||
|
||||
/// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline
|
||||
#[tokio::test]
|
||||
async fn aux_files_round_trip() -> anyhow::Result<()> {
|
||||
let name = "aux_files_round_trip";
|
||||
let harness = TenantHarness::create(name).await?;
|
||||
let harness = TenantShardHarness::create(name).await?;
|
||||
|
||||
pub const TIMELINE_ID: TimelineId =
|
||||
TimelineId::from_array(hex!("11223344556677881122334455667788"));
|
||||
@@ -2453,11 +2432,7 @@ mod tests {
|
||||
("foo/bar2".to_string(), Bytes::from_static(b"content2")),
|
||||
]);
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
|
||||
let readback = tline
|
||||
.list_aux_files(Lsn(0x1008), &ctx, io_concurrency.clone())
|
||||
.await?;
|
||||
let readback = tline.list_aux_files(Lsn(0x1008), &ctx).await?;
|
||||
assert_eq!(readback, expect_1008);
|
||||
|
||||
// Second modification: update one key, remove the other
|
||||
@@ -2469,15 +2444,11 @@ mod tests {
|
||||
let expect_2008 =
|
||||
HashMap::from([("foo/bar1".to_string(), Bytes::from_static(b"content3"))]);
|
||||
|
||||
let readback = tline
|
||||
.list_aux_files(Lsn(0x2008), &ctx, io_concurrency.clone())
|
||||
.await?;
|
||||
let readback = tline.list_aux_files(Lsn(0x2008), &ctx).await?;
|
||||
assert_eq!(readback, expect_2008);
|
||||
|
||||
// Reading back in time works
|
||||
let readback = tline
|
||||
.list_aux_files(Lsn(0x1008), &ctx, io_concurrency.clone())
|
||||
.await?;
|
||||
let readback = tline.list_aux_files(Lsn(0x1008), &ctx).await?;
|
||||
assert_eq!(readback, expect_1008);
|
||||
|
||||
Ok(())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -281,14 +281,6 @@ pub struct TenantConfOpt {
|
||||
#[serde(default)]
|
||||
pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub l0_flush_delay_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub l0_flush_stall_threshold: Option<usize>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub gc_horizon: Option<u64>,
|
||||
@@ -365,18 +357,6 @@ pub struct TenantConfOpt {
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub rel_size_v2_enabled: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gc_compaction_enabled: Option<bool>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gc_compaction_initial_threshold_kb: Option<u64>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub gc_compaction_ratio_percent: Option<u64>,
|
||||
}
|
||||
|
||||
impl TenantConfOpt {
|
||||
@@ -402,12 +382,6 @@ impl TenantConfOpt {
|
||||
.as_ref()
|
||||
.unwrap_or(&global_conf.compaction_algorithm)
|
||||
.clone(),
|
||||
l0_flush_delay_threshold: self
|
||||
.l0_flush_delay_threshold
|
||||
.or(global_conf.l0_flush_delay_threshold),
|
||||
l0_flush_stall_threshold: self
|
||||
.l0_flush_stall_threshold
|
||||
.or(global_conf.l0_flush_stall_threshold),
|
||||
gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),
|
||||
gc_period: self.gc_period.unwrap_or(global_conf.gc_period),
|
||||
image_creation_threshold: self
|
||||
@@ -451,16 +425,6 @@ impl TenantConfOpt {
|
||||
wal_receiver_protocol_override: self
|
||||
.wal_receiver_protocol_override
|
||||
.or(global_conf.wal_receiver_protocol_override),
|
||||
rel_size_v2_enabled: self.rel_size_v2_enabled.or(global_conf.rel_size_v2_enabled),
|
||||
gc_compaction_enabled: self
|
||||
.gc_compaction_enabled
|
||||
.unwrap_or(global_conf.gc_compaction_enabled),
|
||||
gc_compaction_initial_threshold_kb: self
|
||||
.gc_compaction_initial_threshold_kb
|
||||
.unwrap_or(global_conf.gc_compaction_initial_threshold_kb),
|
||||
gc_compaction_ratio_percent: self
|
||||
.gc_compaction_ratio_percent
|
||||
.unwrap_or(global_conf.gc_compaction_ratio_percent),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -472,8 +436,6 @@ impl TenantConfOpt {
|
||||
mut compaction_period,
|
||||
mut compaction_threshold,
|
||||
mut compaction_algorithm,
|
||||
mut l0_flush_delay_threshold,
|
||||
mut l0_flush_stall_threshold,
|
||||
mut gc_horizon,
|
||||
mut gc_period,
|
||||
mut image_creation_threshold,
|
||||
@@ -492,10 +454,6 @@ impl TenantConfOpt {
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
mut wal_receiver_protocol_override,
|
||||
mut rel_size_v2_enabled,
|
||||
mut gc_compaction_enabled,
|
||||
mut gc_compaction_initial_threshold_kb,
|
||||
mut gc_compaction_ratio_percent,
|
||||
} = self;
|
||||
|
||||
patch.checkpoint_distance.apply(&mut checkpoint_distance);
|
||||
@@ -512,12 +470,6 @@ impl TenantConfOpt {
|
||||
.apply(&mut compaction_period);
|
||||
patch.compaction_threshold.apply(&mut compaction_threshold);
|
||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||
patch
|
||||
.l0_flush_delay_threshold
|
||||
.apply(&mut l0_flush_delay_threshold);
|
||||
patch
|
||||
.l0_flush_stall_threshold
|
||||
.apply(&mut l0_flush_stall_threshold);
|
||||
patch.gc_horizon.apply(&mut gc_horizon);
|
||||
patch
|
||||
.gc_period
|
||||
@@ -570,16 +522,6 @@ impl TenantConfOpt {
|
||||
patch
|
||||
.wal_receiver_protocol_override
|
||||
.apply(&mut wal_receiver_protocol_override);
|
||||
patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled);
|
||||
patch
|
||||
.gc_compaction_enabled
|
||||
.apply(&mut gc_compaction_enabled);
|
||||
patch
|
||||
.gc_compaction_initial_threshold_kb
|
||||
.apply(&mut gc_compaction_initial_threshold_kb);
|
||||
patch
|
||||
.gc_compaction_ratio_percent
|
||||
.apply(&mut gc_compaction_ratio_percent);
|
||||
|
||||
Ok(Self {
|
||||
checkpoint_distance,
|
||||
@@ -588,8 +530,6 @@ impl TenantConfOpt {
|
||||
compaction_period,
|
||||
compaction_threshold,
|
||||
compaction_algorithm,
|
||||
l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold,
|
||||
gc_horizon,
|
||||
gc_period,
|
||||
image_creation_threshold,
|
||||
@@ -608,10 +548,6 @@ impl TenantConfOpt {
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
wal_receiver_protocol_override,
|
||||
rel_size_v2_enabled,
|
||||
gc_compaction_enabled,
|
||||
gc_compaction_initial_threshold_kb,
|
||||
gc_compaction_ratio_percent,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -647,8 +583,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
compaction_target_size: value.compaction_target_size,
|
||||
compaction_period: value.compaction_period.map(humantime),
|
||||
compaction_threshold: value.compaction_threshold,
|
||||
l0_flush_delay_threshold: value.l0_flush_delay_threshold,
|
||||
l0_flush_stall_threshold: value.l0_flush_stall_threshold,
|
||||
gc_horizon: value.gc_horizon,
|
||||
gc_period: value.gc_period.map(humantime),
|
||||
image_creation_threshold: value.image_creation_threshold,
|
||||
@@ -669,10 +603,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
|
||||
timeline_offloading: value.timeline_offloading,
|
||||
wal_receiver_protocol_override: value.wal_receiver_protocol_override,
|
||||
rel_size_v2_enabled: value.rel_size_v2_enabled,
|
||||
gc_compaction_enabled: value.gc_compaction_enabled,
|
||||
gc_compaction_initial_threshold_kb: value.gc_compaction_initial_threshold_kb,
|
||||
gc_compaction_ratio_percent: value.gc_compaction_ratio_percent,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,7 +57,6 @@ use std::collections::{HashMap, VecDeque};
|
||||
use std::iter::Peekable;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::watch;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
||||
@@ -68,6 +67,7 @@ use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc};
|
||||
///
|
||||
/// LayerMap tracks what layers exist on a timeline.
|
||||
///
|
||||
#[derive(Default)]
|
||||
pub struct LayerMap {
|
||||
//
|
||||
// 'open_layer' holds the current InMemoryLayer that is accepting new
|
||||
@@ -93,25 +93,7 @@ pub struct LayerMap {
|
||||
|
||||
/// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
|
||||
/// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
|
||||
///
|
||||
/// NB: make sure to notify `watch_l0_deltas` on changes.
|
||||
l0_delta_layers: Vec<Arc<PersistentLayerDesc>>,
|
||||
|
||||
/// Notifies about L0 delta layer changes, sending the current number of L0 layers.
|
||||
watch_l0_deltas: watch::Sender<usize>,
|
||||
}
|
||||
|
||||
impl Default for LayerMap {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
open_layer: Default::default(),
|
||||
next_open_layer_at: Default::default(),
|
||||
frozen_layers: Default::default(),
|
||||
historic: Default::default(),
|
||||
l0_delta_layers: Default::default(),
|
||||
watch_l0_deltas: watch::channel(0).0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The primary update API for the layer map.
|
||||
@@ -484,8 +466,6 @@ impl LayerMap {
|
||||
|
||||
if Self::is_l0(&layer_desc.key_range, layer_desc.is_delta) {
|
||||
self.l0_delta_layers.push(layer_desc.clone().into());
|
||||
self.watch_l0_deltas
|
||||
.send_replace(self.l0_delta_layers.len());
|
||||
}
|
||||
|
||||
self.historic.insert(
|
||||
@@ -508,8 +488,6 @@ impl LayerMap {
|
||||
let mut l0_delta_layers = std::mem::take(&mut self.l0_delta_layers);
|
||||
l0_delta_layers.retain(|other| other.key() != layer_key);
|
||||
self.l0_delta_layers = l0_delta_layers;
|
||||
self.watch_l0_deltas
|
||||
.send_replace(self.l0_delta_layers.len());
|
||||
// this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,
|
||||
// there's a chance that the comparison fails at runtime due to it comparing (pointer,
|
||||
// vtable) pairs.
|
||||
@@ -872,11 +850,6 @@ impl LayerMap {
|
||||
&self.l0_delta_layers
|
||||
}
|
||||
|
||||
/// Subscribes to L0 delta layer changes, sending the current number of L0 delta layers.
|
||||
pub fn watch_level0_deltas(&self) -> watch::Receiver<usize> {
|
||||
self.watch_l0_deltas.subscribe()
|
||||
}
|
||||
|
||||
/// debugging function to print out the contents of the layer map
|
||||
#[allow(unused)]
|
||||
pub async fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||
|
||||
@@ -44,7 +44,9 @@ use crate::tenant::config::{
|
||||
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||
use crate::tenant::storage_layer::inmemory_layer;
|
||||
use crate::tenant::timeline::ShutdownMode;
|
||||
use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Tenant, TenantState};
|
||||
use crate::tenant::{
|
||||
AttachedTenantConf, GcError, LoadConfigError, SpawnMode, TenantShard, TenantState,
|
||||
};
|
||||
use crate::virtual_file::MaybeFatalIo;
|
||||
use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
|
||||
|
||||
@@ -69,7 +71,7 @@ use super::{GlobalShutDown, TenantSharedResources};
|
||||
/// having a properly acquired generation (Secondary doesn't need a generation)
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum TenantSlot {
|
||||
Attached(Arc<Tenant>),
|
||||
Attached(Arc<TenantShard>),
|
||||
Secondary(Arc<SecondaryTenant>),
|
||||
/// In this state, other administrative operations acting on the TenantId should
|
||||
/// block, or return a retry indicator equivalent to HTTP 503.
|
||||
@@ -88,7 +90,7 @@ impl std::fmt::Debug for TenantSlot {
|
||||
|
||||
impl TenantSlot {
|
||||
/// Return the `Tenant` in this slot if attached, else None
|
||||
fn get_attached(&self) -> Option<&Arc<Tenant>> {
|
||||
fn get_attached(&self) -> Option<&Arc<TenantShard>> {
|
||||
match self {
|
||||
Self::Attached(t) => Some(t),
|
||||
Self::Secondary(_) => None,
|
||||
@@ -99,13 +101,13 @@ impl TenantSlot {
|
||||
|
||||
/// The tenants known to the pageserver.
|
||||
/// The enum variants are used to distinguish the different states that the pageserver can be in.
|
||||
pub(crate) enum TenantsMap {
|
||||
pub(crate) enum TenantShardMap {
|
||||
/// [`init_tenant_mgr`] is not done yet.
|
||||
Initializing,
|
||||
/// [`init_tenant_mgr`] is done, all on-disk tenants have been loaded.
|
||||
/// New tenants can be added using [`tenant_map_acquire_slot`].
|
||||
Open(BTreeMap<TenantShardId, TenantSlot>),
|
||||
/// The pageserver has entered shutdown mode via [`TenantManager::shutdown`].
|
||||
/// The pageserver has entered shutdown mode via [`TenantShardManager::shutdown`].
|
||||
/// Existing tenants are still accessible, but no new tenants can be created.
|
||||
ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),
|
||||
}
|
||||
@@ -166,19 +168,19 @@ impl TenantStartupMode {
|
||||
/// Result type for looking up a TenantId to a specific shard
|
||||
pub(crate) enum ShardResolveResult {
|
||||
NotFound,
|
||||
Found(Arc<Tenant>),
|
||||
Found(Arc<TenantShard>),
|
||||
// Wait for this barrrier, then query again
|
||||
InProgress(utils::completion::Barrier),
|
||||
}
|
||||
|
||||
impl TenantsMap {
|
||||
impl TenantShardMap {
|
||||
/// Convenience function for typical usage, where we want to get a `Tenant` object, for
|
||||
/// working with attached tenants. If the TenantId is in the map but in Secondary state,
|
||||
/// None is returned.
|
||||
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
|
||||
pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<TenantShard>> {
|
||||
match self {
|
||||
TenantsMap::Initializing => None,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
TenantShardMap::Initializing => None,
|
||||
TenantShardMap::Open(m) | TenantShardMap::ShuttingDown(m) => {
|
||||
m.get(tenant_shard_id).and_then(|slot| slot.get_attached())
|
||||
}
|
||||
}
|
||||
@@ -187,8 +189,8 @@ impl TenantsMap {
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
match self {
|
||||
TenantsMap::Initializing => 0,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m.len(),
|
||||
TenantShardMap::Initializing => 0,
|
||||
TenantShardMap::Open(m) | TenantShardMap::ShuttingDown(m) => m.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -284,23 +286,23 @@ impl BackgroundPurges {
|
||||
}
|
||||
}
|
||||
|
||||
static TENANTS: Lazy<std::sync::RwLock<TenantsMap>> =
|
||||
Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing));
|
||||
static TENANTS: Lazy<std::sync::RwLock<TenantShardMap>> =
|
||||
Lazy::new(|| std::sync::RwLock::new(TenantShardMap::Initializing));
|
||||
|
||||
/// Responsible for storing and mutating the collection of all tenants
|
||||
/// that this pageserver has state for.
|
||||
///
|
||||
/// Every Tenant and SecondaryTenant instance lives inside the TenantManager.
|
||||
/// Every TenantShard and SecondaryTenant instance lives inside the TenantShardManager.
|
||||
///
|
||||
/// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach
|
||||
/// The most important role of the TenantShardManager is to prevent conflicts: e.g. trying to attach
|
||||
/// the same tenant twice concurrently, or trying to configure the same tenant into secondary
|
||||
/// and attached modes concurrently.
|
||||
pub struct TenantManager {
|
||||
pub struct TenantShardManager {
|
||||
conf: &'static PageServerConf,
|
||||
// TODO: currently this is a &'static pointing to TENANTs. When we finish refactoring
|
||||
// out of that static variable, the TenantManager can own this.
|
||||
// See https://github.com/neondatabase/neon/issues/5796
|
||||
tenants: &'static std::sync::RwLock<TenantsMap>,
|
||||
tenants: &'static std::sync::RwLock<TenantShardMap>,
|
||||
resources: TenantSharedResources,
|
||||
|
||||
// Long-running operations that happen outside of a [`Tenant`] lifetime should respect this token.
|
||||
@@ -412,7 +414,7 @@ fn load_tenant_config(
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Tenant::load_tenant_config(conf, &tenant_shard_id))
|
||||
Some(TenantShard::load_tenant_config(conf, &tenant_shard_id))
|
||||
}
|
||||
|
||||
/// Initial stage of load: walk the local tenants directory, clean up any temp files,
|
||||
@@ -491,7 +493,7 @@ pub async fn init_tenant_mgr(
|
||||
resources: TenantSharedResources,
|
||||
init_order: InitializationOrder,
|
||||
cancel: CancellationToken,
|
||||
) -> anyhow::Result<TenantManager> {
|
||||
) -> anyhow::Result<TenantShardManager> {
|
||||
let mut tenants = BTreeMap::new();
|
||||
|
||||
let ctx = RequestContext::todo_child(TaskKind::Startup, DownloadBehavior::Warn);
|
||||
@@ -606,7 +608,8 @@ pub async fn init_tenant_mgr(
|
||||
// Presence of a generation number implies attachment: attach the tenant
|
||||
// if it wasn't already, and apply the generation number.
|
||||
config_write_futs.push(async move {
|
||||
let r = Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
|
||||
let r =
|
||||
TenantShard::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
|
||||
(tenant_shard_id, location_conf, r)
|
||||
});
|
||||
}
|
||||
@@ -669,11 +672,11 @@ pub async fn init_tenant_mgr(
|
||||
info!("Processed {} local tenants at startup", tenants.len());
|
||||
|
||||
let mut tenants_map = TENANTS.write().unwrap();
|
||||
assert!(matches!(&*tenants_map, &TenantsMap::Initializing));
|
||||
assert!(matches!(&*tenants_map, &TenantShardMap::Initializing));
|
||||
|
||||
*tenants_map = TenantsMap::Open(tenants);
|
||||
*tenants_map = TenantShardMap::Open(tenants);
|
||||
|
||||
Ok(TenantManager {
|
||||
Ok(TenantShardManager {
|
||||
conf,
|
||||
tenants: &TENANTS,
|
||||
resources,
|
||||
@@ -682,7 +685,7 @@ pub async fn init_tenant_mgr(
|
||||
})
|
||||
}
|
||||
|
||||
/// Wrapper for Tenant::spawn that checks invariants before running
|
||||
/// Wrapper for TenantShard::spawn that checks invariants before running
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn tenant_spawn(
|
||||
conf: &'static PageServerConf,
|
||||
@@ -694,7 +697,7 @@ fn tenant_spawn(
|
||||
init_order: Option<InitializationOrder>,
|
||||
mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Tenant>, GlobalShutDown> {
|
||||
) -> Result<Arc<TenantShard>, GlobalShutDown> {
|
||||
// All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed
|
||||
// path, and contains a configuration file. Assertions that do synchronous I/O are limited to debug mode
|
||||
// to avoid impacting prod runtime performance.
|
||||
@@ -705,7 +708,7 @@ fn tenant_spawn(
|
||||
.try_exists()
|
||||
.unwrap());
|
||||
|
||||
Tenant::spawn(
|
||||
TenantShard::spawn(
|
||||
conf,
|
||||
tenant_shard_id,
|
||||
resources,
|
||||
@@ -717,7 +720,7 @@ fn tenant_spawn(
|
||||
)
|
||||
}
|
||||
|
||||
async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
|
||||
async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantShardMap>) {
|
||||
let mut join_set = JoinSet::new();
|
||||
|
||||
#[cfg(all(debug_assertions, not(test)))]
|
||||
@@ -732,12 +735,12 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
|
||||
let (total_in_progress, total_attached) = {
|
||||
let mut m = tenants.write().unwrap();
|
||||
match &mut *m {
|
||||
TenantsMap::Initializing => {
|
||||
*m = TenantsMap::ShuttingDown(BTreeMap::default());
|
||||
TenantShardMap::Initializing => {
|
||||
*m = TenantShardMap::ShuttingDown(BTreeMap::default());
|
||||
info!("tenants map is empty");
|
||||
return;
|
||||
}
|
||||
TenantsMap::Open(tenants) => {
|
||||
TenantShardMap::Open(tenants) => {
|
||||
let mut shutdown_state = BTreeMap::new();
|
||||
let mut total_in_progress = 0;
|
||||
let mut total_attached = 0;
|
||||
@@ -787,10 +790,10 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
|
||||
}
|
||||
}
|
||||
}
|
||||
*m = TenantsMap::ShuttingDown(shutdown_state);
|
||||
*m = TenantShardMap::ShuttingDown(shutdown_state);
|
||||
(total_in_progress, total_attached)
|
||||
}
|
||||
TenantsMap::ShuttingDown(_) => {
|
||||
TenantShardMap::ShuttingDown(_) => {
|
||||
error!("already shutting down, this function isn't supposed to be called more than once");
|
||||
return;
|
||||
}
|
||||
@@ -870,7 +873,7 @@ pub(crate) enum UpsertLocationError {
|
||||
InternalError(anyhow::Error),
|
||||
}
|
||||
|
||||
impl TenantManager {
|
||||
impl TenantShardManager {
|
||||
/// Convenience function so that anyone with a TenantManager can get at the global configuration, without
|
||||
/// having to pass it around everywhere as a separate object.
|
||||
pub(crate) fn get_conf(&self) -> &'static PageServerConf {
|
||||
@@ -881,11 +884,11 @@ impl TenantManager {
|
||||
/// undergoing a state change (i.e. slot is InProgress).
|
||||
///
|
||||
/// The return Tenant is not guaranteed to be active: check its status after obtaing it, or
|
||||
/// use [`Tenant::wait_to_become_active`] before using it if you will do I/O on it.
|
||||
/// use [`TenantShard::wait_to_become_active`] before using it if you will do I/O on it.
|
||||
pub(crate) fn get_attached_tenant_shard(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
) -> Result<Arc<Tenant>, GetTenantError> {
|
||||
) -> Result<Arc<TenantShard>, GetTenantError> {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
|
||||
let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
|
||||
@@ -934,12 +937,12 @@ impl TenantManager {
|
||||
flush: Option<Duration>,
|
||||
mut spawn_mode: SpawnMode,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<Arc<Tenant>>, UpsertLocationError> {
|
||||
) -> Result<Option<Arc<TenantShard>>, UpsertLocationError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
info!("configuring tenant location to state {new_location_config:?}");
|
||||
|
||||
enum FastPathModified {
|
||||
Attached(Arc<Tenant>),
|
||||
Attached(Arc<TenantShard>),
|
||||
Secondary(Arc<SecondaryTenant>),
|
||||
}
|
||||
|
||||
@@ -996,9 +999,13 @@ impl TenantManager {
|
||||
// phase of writing config and/or waiting for flush, before returning.
|
||||
match fast_path_taken {
|
||||
Some(FastPathModified::Attached(tenant)) => {
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
TenantShard::persist_tenant_config(
|
||||
self.conf,
|
||||
&tenant_shard_id,
|
||||
&new_location_config,
|
||||
)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
|
||||
// Transition to AttachedStale means we may well hold a valid generation
|
||||
// still, and have been requested to go stale as part of a migration. If
|
||||
@@ -1027,9 +1034,13 @@ impl TenantManager {
|
||||
return Ok(Some(tenant));
|
||||
}
|
||||
Some(FastPathModified::Secondary(_secondary_tenant)) => {
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
TenantShard::persist_tenant_config(
|
||||
self.conf,
|
||||
&tenant_shard_id,
|
||||
&new_location_config,
|
||||
)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
@@ -1119,7 +1130,7 @@ impl TenantManager {
|
||||
// Before activating either secondary or attached mode, persist the
|
||||
// configuration, so that on restart we will re-attach (or re-start
|
||||
// secondary) on the tenant.
|
||||
Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
TenantShard::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
|
||||
.await
|
||||
.fatal_err("write tenant shard config");
|
||||
|
||||
@@ -1257,7 +1268,7 @@ impl TenantManager {
|
||||
|
||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||
let timelines_path = self.conf.timelines_path(&tenant_shard_id);
|
||||
let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)?;
|
||||
let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)?;
|
||||
|
||||
if drop_cache {
|
||||
tracing::info!("Dropping local file cache");
|
||||
@@ -1292,11 +1303,11 @@ impl TenantManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<Tenant>> {
|
||||
pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<TenantShard>> {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
match &*locked {
|
||||
TenantsMap::Initializing => Vec::new(),
|
||||
TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => map
|
||||
TenantShardMap::Initializing => Vec::new(),
|
||||
TenantShardMap::Open(map) | TenantShardMap::ShuttingDown(map) => map
|
||||
.values()
|
||||
.filter_map(|slot| {
|
||||
slot.get_attached()
|
||||
@@ -1316,8 +1327,8 @@ impl TenantManager {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
|
||||
let map = match &*locked {
|
||||
TenantsMap::Initializing | TenantsMap::ShuttingDown(_) => return,
|
||||
TenantsMap::Open(m) => m,
|
||||
TenantShardMap::Initializing | TenantShardMap::ShuttingDown(_) => return,
|
||||
TenantShardMap::Open(m) => m,
|
||||
};
|
||||
|
||||
for (tenant_id, slot) in map {
|
||||
@@ -1334,8 +1345,8 @@ impl TenantManager {
|
||||
pub(crate) fn list(&self) -> Vec<(TenantShardId, TenantSlot)> {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
match &*locked {
|
||||
TenantsMap::Initializing => Vec::new(),
|
||||
TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => {
|
||||
TenantShardMap::Initializing => Vec::new(),
|
||||
TenantShardMap::Open(map) | TenantShardMap::ShuttingDown(map) => {
|
||||
map.iter().map(|(k, v)| (*k, v.clone())).collect()
|
||||
}
|
||||
}
|
||||
@@ -1344,8 +1355,8 @@ impl TenantManager {
|
||||
pub(crate) fn get(&self, tenant_shard_id: TenantShardId) -> Option<TenantSlot> {
|
||||
let locked = self.tenants.read().unwrap();
|
||||
match &*locked {
|
||||
TenantsMap::Initializing => None,
|
||||
TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => {
|
||||
TenantShardMap::Initializing => None,
|
||||
TenantShardMap::Open(map) | TenantShardMap::ShuttingDown(map) => {
|
||||
map.get(&tenant_shard_id).cloned()
|
||||
}
|
||||
}
|
||||
@@ -1441,7 +1452,7 @@ impl TenantManager {
|
||||
#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
|
||||
pub(crate) async fn shard_split(
|
||||
&self,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
new_shard_count: ShardCount,
|
||||
new_stripe_size: Option<ShardStripeSize>,
|
||||
ctx: &RequestContext,
|
||||
@@ -1471,7 +1482,7 @@ impl TenantManager {
|
||||
|
||||
pub(crate) async fn do_shard_split(
|
||||
&self,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
new_shard_count: ShardCount,
|
||||
new_stripe_size: Option<ShardStripeSize>,
|
||||
ctx: &RequestContext,
|
||||
@@ -1519,7 +1530,7 @@ impl TenantManager {
|
||||
|
||||
// Phase 1: Write out child shards' remote index files, in the parent tenant's current generation
|
||||
if let Err(e) = tenant.split_prepare(&child_shards).await {
|
||||
// If [`Tenant::split_prepare`] fails, we must reload the tenant, because it might
|
||||
// If [`TenantShard::split_prepare`] fails, we must reload the tenant, because it might
|
||||
// have been left in a partially-shut-down state.
|
||||
tracing::warn!("Failed to prepare for split: {e}, reloading Tenant before returning");
|
||||
return Err(e);
|
||||
@@ -1697,7 +1708,7 @@ impl TenantManager {
|
||||
/// For each resident layer in the parent shard, we will hard link it into all of the child shards.
|
||||
async fn shard_split_hardlink(
|
||||
&self,
|
||||
parent_shard: &Tenant,
|
||||
parent_shard: &TenantShard,
|
||||
child_shards: Vec<TenantShardId>,
|
||||
) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
@@ -1888,8 +1899,8 @@ impl TenantManager {
|
||||
) -> Result<Vec<(TenantShardId, TenantState, Generation)>, TenantMapListError> {
|
||||
let tenants = self.tenants.read().unwrap();
|
||||
let m = match &*tenants {
|
||||
TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m,
|
||||
TenantShardMap::Initializing => return Err(TenantMapListError::Initializing),
|
||||
TenantShardMap::Open(m) | TenantShardMap::ShuttingDown(m) => m,
|
||||
};
|
||||
Ok(m.iter()
|
||||
.filter_map(|(id, tenant)| match tenant {
|
||||
@@ -1974,7 +1985,7 @@ impl TenantManager {
|
||||
}
|
||||
|
||||
let tenant_path = self.conf.tenant_path(&tenant_shard_id);
|
||||
let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)
|
||||
let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)
|
||||
.map_err(|e| Error::DetachReparent(e.into()))?;
|
||||
|
||||
let shard_identity = config.shard;
|
||||
@@ -2070,8 +2081,8 @@ impl TenantManager {
|
||||
let mut any_in_progress = None;
|
||||
|
||||
match &*tenants {
|
||||
TenantsMap::Initializing => ShardResolveResult::NotFound,
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
|
||||
TenantShardMap::Initializing => ShardResolveResult::NotFound,
|
||||
TenantShardMap::Open(m) | TenantShardMap::ShuttingDown(m) => {
|
||||
for slot in m.range(TenantShardId::tenant_range(*tenant_id)) {
|
||||
// Ignore all slots that don't contain an attached tenant
|
||||
let tenant = match &slot.1 {
|
||||
@@ -2135,8 +2146,8 @@ impl TenantManager {
|
||||
pub(crate) fn calculate_utilization(&self) -> Result<(u64, u32), TenantMapListError> {
|
||||
let tenants = self.tenants.read().unwrap();
|
||||
let m = match &*tenants {
|
||||
TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
|
||||
TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => m,
|
||||
TenantShardMap::Initializing => return Err(TenantMapListError::Initializing),
|
||||
TenantShardMap::Open(m) | TenantShardMap::ShuttingDown(m) => m,
|
||||
};
|
||||
let shard_count = m.len();
|
||||
let mut wanted_bytes = 0;
|
||||
@@ -2482,18 +2493,18 @@ impl SlotGuard {
|
||||
}
|
||||
|
||||
let m = match &mut *locked {
|
||||
TenantsMap::Initializing => {
|
||||
TenantShardMap::Initializing => {
|
||||
return Err(TenantSlotUpsertError::MapState(
|
||||
TenantMapError::StillInitializing,
|
||||
))
|
||||
}
|
||||
TenantsMap::ShuttingDown(_) => {
|
||||
TenantShardMap::ShuttingDown(_) => {
|
||||
return Err(TenantSlotUpsertError::ShuttingDown((
|
||||
new_value,
|
||||
self.completion.clone(),
|
||||
)));
|
||||
}
|
||||
TenantsMap::Open(m) => m,
|
||||
TenantShardMap::Open(m) => m,
|
||||
};
|
||||
|
||||
METRICS.slot_inserted(&new_value);
|
||||
@@ -2590,17 +2601,17 @@ impl Drop for SlotGuard {
|
||||
let mut locked = TENANTS.write().unwrap();
|
||||
|
||||
let m = match &mut *locked {
|
||||
TenantsMap::Initializing => {
|
||||
TenantShardMap::Initializing => {
|
||||
// There is no map, this should never happen.
|
||||
return;
|
||||
}
|
||||
TenantsMap::ShuttingDown(_) => {
|
||||
TenantShardMap::ShuttingDown(_) => {
|
||||
// When we transition to shutdown, InProgress elements are removed
|
||||
// from the map, so we do not need to clean up our Inprogress marker.
|
||||
// See [`shutdown_all_tenants0`]
|
||||
return;
|
||||
}
|
||||
TenantsMap::Open(m) => m,
|
||||
TenantShardMap::Open(m) => m,
|
||||
};
|
||||
|
||||
use std::collections::btree_map::Entry;
|
||||
@@ -2640,13 +2651,13 @@ enum TenantSlotPeekMode {
|
||||
}
|
||||
|
||||
fn tenant_map_peek_slot<'a>(
|
||||
tenants: &'a std::sync::RwLockReadGuard<'a, TenantsMap>,
|
||||
tenants: &'a std::sync::RwLockReadGuard<'a, TenantShardMap>,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
mode: TenantSlotPeekMode,
|
||||
) -> Result<Option<&'a TenantSlot>, TenantMapError> {
|
||||
match tenants.deref() {
|
||||
TenantsMap::Initializing => Err(TenantMapError::StillInitializing),
|
||||
TenantsMap::ShuttingDown(m) => match mode {
|
||||
TenantShardMap::Initializing => Err(TenantMapError::StillInitializing),
|
||||
TenantShardMap::ShuttingDown(m) => match mode {
|
||||
TenantSlotPeekMode::Read => Ok(Some(
|
||||
// When reading in ShuttingDown state, we must translate None results
|
||||
// into a ShuttingDown error, because absence of a tenant shard ID in the map
|
||||
@@ -2658,7 +2669,7 @@ fn tenant_map_peek_slot<'a>(
|
||||
)),
|
||||
TenantSlotPeekMode::Write => Err(TenantMapError::ShuttingDown),
|
||||
},
|
||||
TenantsMap::Open(m) => Ok(m.get(tenant_shard_id)),
|
||||
TenantShardMap::Open(m) => Ok(m.get(tenant_shard_id)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2678,7 +2689,7 @@ fn tenant_map_acquire_slot(
|
||||
|
||||
fn tenant_map_acquire_slot_impl(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
tenants: &std::sync::RwLock<TenantsMap>,
|
||||
tenants: &std::sync::RwLock<TenantShardMap>,
|
||||
mode: TenantSlotAcquireMode,
|
||||
) -> Result<SlotGuard, TenantSlotError> {
|
||||
use TenantSlotAcquireMode::*;
|
||||
@@ -2689,9 +2700,9 @@ fn tenant_map_acquire_slot_impl(
|
||||
let _guard = span.enter();
|
||||
|
||||
let m = match &mut *locked {
|
||||
TenantsMap::Initializing => return Err(TenantMapError::StillInitializing.into()),
|
||||
TenantsMap::ShuttingDown(_) => return Err(TenantMapError::ShuttingDown.into()),
|
||||
TenantsMap::Open(m) => m,
|
||||
TenantShardMap::Initializing => return Err(TenantMapError::StillInitializing.into()),
|
||||
TenantShardMap::ShuttingDown(_) => return Err(TenantMapError::ShuttingDown.into()),
|
||||
TenantShardMap::Open(m) => m,
|
||||
};
|
||||
|
||||
use std::collections::btree_map::Entry;
|
||||
@@ -2744,7 +2755,7 @@ fn tenant_map_acquire_slot_impl(
|
||||
/// If the cleanup fails, tenant will stay in memory in [`TenantState::Broken`] state, and another removal
|
||||
/// operation would be needed to remove it.
|
||||
async fn remove_tenant_from_memory<V, F>(
|
||||
tenants: &std::sync::RwLock<TenantsMap>,
|
||||
tenants: &std::sync::RwLock<TenantShardMap>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
tenant_cleanup: F,
|
||||
) -> Result<V, TenantStateError>
|
||||
@@ -2827,14 +2838,14 @@ mod tests {
|
||||
|
||||
use crate::tenant::mgr::TenantSlot;
|
||||
|
||||
use super::{super::harness::TenantHarness, TenantsMap};
|
||||
use super::{super::harness::TenantShardHarness, TenantShardMap};
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn shutdown_awaits_in_progress_tenant() {
|
||||
// Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully
|
||||
// wait for it to complete before proceeding.
|
||||
|
||||
let h = TenantHarness::create("shutdown_awaits_in_progress_tenant")
|
||||
let h = TenantShardHarness::create("shutdown_awaits_in_progress_tenant")
|
||||
.await
|
||||
.unwrap();
|
||||
let (t, _ctx) = h.load().await;
|
||||
@@ -2848,7 +2859,7 @@ mod tests {
|
||||
let _e = span.enter();
|
||||
|
||||
let tenants = BTreeMap::from([(id, TenantSlot::Attached(t.clone()))]);
|
||||
let tenants = Arc::new(std::sync::RwLock::new(TenantsMap::Open(tenants)));
|
||||
let tenants = Arc::new(std::sync::RwLock::new(TenantShardMap::Open(tenants)));
|
||||
|
||||
// Invoke remove_tenant_from_memory with a cleanup hook that blocks until we manually
|
||||
// permit it to proceed: that will stick the tenant in InProgress
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
//! - Initiate upload queue with that [`IndexPart`].
|
||||
//! - Reschedule all lost operations by comparing the local filesystem state
|
||||
//! and remote state as per [`IndexPart`]. This is done in
|
||||
//! [`Tenant::timeline_init_and_sync`].
|
||||
//! [`TenantShard::timeline_init_and_sync`].
|
||||
//!
|
||||
//! Note that if we crash during file deletion between the index update
|
||||
//! that removes the file from the list of files, and deleting the remote file,
|
||||
@@ -171,7 +171,7 @@
|
||||
//! If no remote storage configuration is provided, the [`RemoteTimelineClient`] is
|
||||
//! not created and the uploads are skipped.
|
||||
//!
|
||||
//! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
|
||||
//! [`TenantShard::timeline_init_and_sync`]: super::TenantShard::timeline_init_and_sync
|
||||
//! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map
|
||||
|
||||
pub(crate) mod download;
|
||||
@@ -382,12 +382,6 @@ pub(crate) struct RemoteTimelineClient {
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
impl Drop for RemoteTimelineClient {
|
||||
fn drop(&mut self) {
|
||||
debug!("dropping RemoteTimelineClient");
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteTimelineClient {
|
||||
///
|
||||
/// Create a remote storage client for given timeline
|
||||
@@ -803,12 +797,6 @@ impl RemoteTimelineClient {
|
||||
|
||||
upload_queue.dirty.metadata.apply(update);
|
||||
|
||||
// Defense in depth: if we somehow generated invalid metadata, do not persist it.
|
||||
upload_queue
|
||||
.dirty
|
||||
.validate()
|
||||
.map_err(|e| anyhow::anyhow!(e))?;
|
||||
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
Ok(())
|
||||
@@ -2650,9 +2638,9 @@ mod tests {
|
||||
context::RequestContext,
|
||||
tenant::{
|
||||
config::AttachmentMode,
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
harness::{TenantShardHarness, TIMELINE_ID},
|
||||
storage_layer::layer::local_layer_path,
|
||||
Tenant, Timeline,
|
||||
TenantShard, Timeline,
|
||||
},
|
||||
DEFAULT_PG_VERSION,
|
||||
};
|
||||
@@ -2709,8 +2697,8 @@ mod tests {
|
||||
}
|
||||
|
||||
struct TestSetup {
|
||||
harness: TenantHarness,
|
||||
tenant: Arc<Tenant>,
|
||||
harness: TenantShardHarness,
|
||||
tenant: Arc<TenantShard>,
|
||||
timeline: Arc<Timeline>,
|
||||
tenant_ctx: RequestContext,
|
||||
}
|
||||
@@ -2718,7 +2706,7 @@ mod tests {
|
||||
impl TestSetup {
|
||||
async fn new(test_name: &str) -> anyhow::Result<Self> {
|
||||
let test_name = Box::leak(Box::new(format!("remote_timeline_client__{test_name}")));
|
||||
let harness = TenantHarness::create(test_name).await?;
|
||||
let harness = TenantShardHarness::create(test_name).await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
let timeline = tenant
|
||||
|
||||
@@ -77,32 +77,8 @@ pub struct IndexPart {
|
||||
///
|
||||
/// None means no aux files have been written to the storage before the point
|
||||
/// when this flag is introduced.
|
||||
///
|
||||
/// This flag is not used any more as all tenants have been transitioned to the new aux file policy.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) last_aux_file_policy: Option<AuxFilePolicy>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) rel_size_migration: Option<RelSizeMigration>,
|
||||
|
||||
/// The LSN of gc-compaction horizon. Once gc-compaction is finished for all layer files below an LSN, this LSN will be updated.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub(crate) l2_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum RelSizeMigration {
|
||||
/// The tenant is using the old rel_size format.
|
||||
/// Note that this enum is persisted as `Option<RelSizeMigration>` in the index part, so
|
||||
/// `None` is the same as `Some(RelSizeMigration::Legacy)`.
|
||||
Legacy,
|
||||
/// The tenant is migrating to the new rel_size format. Both old and new rel_size format are
|
||||
/// persisted in the index part. The read path will read both formats and merge them.
|
||||
Migrating,
|
||||
/// The tenant has migrated to the new rel_size format. Only the new rel_size format is persisted
|
||||
/// in the index part, and the read path will not read the old format.
|
||||
Migrated,
|
||||
}
|
||||
|
||||
impl IndexPart {
|
||||
@@ -121,12 +97,10 @@ impl IndexPart {
|
||||
/// - 8: added `archived_at`
|
||||
/// - 9: +gc_blocking
|
||||
/// - 10: +import_pgdata
|
||||
/// - 11: +rel_size_migration
|
||||
/// - 12: +l2_lsn
|
||||
const LATEST_VERSION: usize = 12;
|
||||
const LATEST_VERSION: usize = 10;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
@@ -142,8 +116,6 @@ impl IndexPart {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,21 +152,6 @@ impl IndexPart {
|
||||
};
|
||||
is_same_remote_layer_path(name, metadata, name, index_metadata)
|
||||
}
|
||||
|
||||
/// Check for invariants in the index: this is useful when uploading an index to ensure that if
|
||||
/// we encounter a bug, we do not persist buggy metadata.
|
||||
pub(crate) fn validate(&self) -> Result<(), String> {
|
||||
if self.import_pgdata.is_none()
|
||||
&& self.metadata.ancestor_timeline().is_none()
|
||||
&& self.layer_metadata.is_empty()
|
||||
{
|
||||
// Unless we're in the middle of a raw pgdata import, or this is a child timeline,the index must
|
||||
// always have at least one layer.
|
||||
return Err("Index has no ancestor and no layers".to_string());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata gathered for each of the layer files.
|
||||
@@ -444,8 +401,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -491,8 +446,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -539,8 +492,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -590,8 +541,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
|
||||
@@ -636,8 +585,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -685,8 +632,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -739,8 +684,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Some(AuxFilePolicy::V2),
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -798,8 +741,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -858,8 +799,6 @@ mod tests {
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -923,8 +862,6 @@ mod tests {
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: None,
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -1000,168 +937,7 @@ mod tests {
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
}))),
|
||||
rel_size_migration: None,
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v11_rel_size_migration_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 11,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
},
|
||||
"import_pgdata": {
|
||||
"V1": {
|
||||
"Done": {
|
||||
"idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
|
||||
"started_at": "2024-11-13T09:23:42.123",
|
||||
"finished_at": "2024-11-13T09:42:23.123"
|
||||
}
|
||||
}
|
||||
},
|
||||
"rel_size_migration": "legacy"
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 11,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
}))),
|
||||
rel_size_migration: Some(RelSizeMigration::Legacy),
|
||||
l2_lsn: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v12_l2_lsn_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 12,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
},
|
||||
"import_pgdata": {
|
||||
"V1": {
|
||||
"Done": {
|
||||
"idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
|
||||
"started_at": "2024-11-13T09:23:42.123",
|
||||
"finished_at": "2024-11-13T09:42:23.123"
|
||||
}
|
||||
}
|
||||
},
|
||||
"rel_size_migration": "legacy",
|
||||
"l2_lsn": "0/16960E8"
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 12,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
}))),
|
||||
rel_size_migration: Some(RelSizeMigration::Legacy),
|
||||
l2_lsn: Some("0/16960E8".parse::<Lsn>().unwrap()),
|
||||
})))
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
|
||||
@@ -40,10 +40,6 @@ pub(crate) async fn upload_index_part(
|
||||
});
|
||||
pausable_failpoint!("before-upload-index-pausable");
|
||||
|
||||
// Safety: refuse to persist invalid index metadata, to mitigate the impact of any bug that produces this
|
||||
// (this should never happen)
|
||||
index_part.validate().map_err(|e| anyhow::anyhow!(e))?;
|
||||
|
||||
// FIXME: this error comes too late
|
||||
let serialized = index_part.to_json_bytes()?;
|
||||
let serialized = Bytes::from(serialized);
|
||||
|
||||
@@ -19,7 +19,7 @@ use self::{
|
||||
|
||||
use super::{
|
||||
config::{SecondaryLocationConfig, TenantConfOpt},
|
||||
mgr::TenantManager,
|
||||
mgr::TenantShardManager,
|
||||
span::debug_assert_current_span_has_tenant_id,
|
||||
storage_layer::LayerName,
|
||||
GetTenantError,
|
||||
@@ -374,7 +374,7 @@ impl GlobalTasks {
|
||||
}
|
||||
|
||||
pub fn spawn_tasks(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
background_jobs_can_start: Barrier,
|
||||
cancel: CancellationToken,
|
||||
|
||||
@@ -39,7 +39,7 @@ use super::{
|
||||
};
|
||||
|
||||
use crate::tenant::{
|
||||
mgr::TenantManager,
|
||||
mgr::TenantShardManager,
|
||||
remote_timeline_client::{download::download_layer_file, remote_heatmap_path},
|
||||
};
|
||||
|
||||
@@ -69,7 +69,7 @@ use super::{
|
||||
const DEFAULT_DOWNLOAD_INTERVAL: Duration = Duration::from_millis(60000);
|
||||
|
||||
pub(super) async fn downloader_task(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
command_queue: tokio::sync::mpsc::Receiver<CommandRequest<DownloadCommand>>,
|
||||
background_jobs_can_start: Barrier,
|
||||
@@ -92,7 +92,7 @@ pub(super) async fn downloader_task(
|
||||
}
|
||||
|
||||
struct SecondaryDownloader {
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
root_ctx: RequestContext,
|
||||
}
|
||||
|
||||
@@ -10,11 +10,11 @@ use crate::{
|
||||
tenant::{
|
||||
config::AttachmentMode,
|
||||
mgr::GetTenantError,
|
||||
mgr::TenantManager,
|
||||
mgr::TenantShardManager,
|
||||
remote_timeline_client::remote_heatmap_path,
|
||||
span::debug_assert_current_span_has_tenant_id,
|
||||
tasks::{warn_when_period_overrun, BackgroundLoopKind},
|
||||
Tenant,
|
||||
TenantShard,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -35,7 +35,7 @@ use tracing::{info_span, instrument, Instrument};
|
||||
use utils::{backoff, completion::Barrier, yielding_loop::yielding_loop};
|
||||
|
||||
pub(super) async fn heatmap_uploader_task(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
command_queue: tokio::sync::mpsc::Receiver<CommandRequest<UploadCommand>>,
|
||||
background_jobs_can_start: Barrier,
|
||||
@@ -61,7 +61,7 @@ pub(super) async fn heatmap_uploader_task(
|
||||
/// handling loop and mutates it as needed: there are no locks here, because that event loop
|
||||
/// can hold &mut references to this type throughout.
|
||||
struct HeatmapUploader {
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
tenant_manager: Arc<TenantShardManager>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
cancel: CancellationToken,
|
||||
|
||||
@@ -79,7 +79,7 @@ impl RunningJob for WriteInProgress {
|
||||
}
|
||||
|
||||
struct UploadPending {
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
last_upload: Option<LastUploadState>,
|
||||
target_time: Option<Instant>,
|
||||
period: Option<Duration>,
|
||||
@@ -111,7 +111,7 @@ impl scheduler::Completion for WriteComplete {
|
||||
struct UploaderTenantState {
|
||||
// This Weak only exists to enable culling idle instances of this type
|
||||
// when the Tenant has been deallocated.
|
||||
tenant: Weak<Tenant>,
|
||||
tenant: Weak<TenantShard>,
|
||||
|
||||
/// Digest of the serialized heatmap that we last successfully uploaded
|
||||
last_upload_state: Option<LastUploadState>,
|
||||
@@ -362,7 +362,7 @@ struct LastUploadState {
|
||||
/// of the object we would have uploaded.
|
||||
async fn upload_tenant_heatmap(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
last_upload: Option<LastUploadState>,
|
||||
) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
@@ -361,7 +361,7 @@ where
|
||||
|
||||
/// Periodic execution phase: inspect all attached tenants and schedule any work they require.
|
||||
///
|
||||
/// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::Tenant`] or [`crate::tenant::secondary::SecondaryTenant`]
|
||||
/// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::TenantShard`] or [`crate::tenant::secondary::SecondaryTenant`]
|
||||
///
|
||||
/// This function resets the pending list: it is assumed that the caller may change their mind about
|
||||
/// which tenants need work between calls to schedule_iteration.
|
||||
|
||||
@@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use crate::context::RequestContext;
|
||||
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
||||
|
||||
use super::{GcError, LogicalSizeCalculationCause, Tenant};
|
||||
use super::{GcError, LogicalSizeCalculationCause, TenantShard};
|
||||
use crate::tenant::{MaybeOffloaded, Timeline};
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -159,7 +159,7 @@ pub struct TimelineInputs {
|
||||
/// initdb_lsn branchpoints* next_pitr_cutoff latest
|
||||
/// ```
|
||||
pub(super) async fn gather_inputs(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
limit: &Arc<Semaphore>,
|
||||
max_retention_period: Option<u64>,
|
||||
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
||||
|
||||
@@ -10,26 +10,18 @@ mod layer_desc;
|
||||
mod layer_name;
|
||||
pub mod merge_iterator;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{AccessStatsBehavior, RequestContext};
|
||||
use bytes::Bytes;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::value::Value;
|
||||
use std::cmp::Ordering;
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::future::Future;
|
||||
use std::ops::Range;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
use tracing::{trace, Instrument};
|
||||
use utils::sync::gate::GateGuard;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -86,151 +78,30 @@ pub(crate) enum ValueReconstructSituation {
|
||||
Continue,
|
||||
}
|
||||
|
||||
/// On disk representation of a value loaded in a buffer
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum OnDiskValue {
|
||||
/// Unencoded [`Value::Image`]
|
||||
RawImage(Bytes),
|
||||
/// Encoded [`Value`]. Can deserialize into an image or a WAL record
|
||||
WalRecordOrImage(Bytes),
|
||||
}
|
||||
|
||||
/// Reconstruct data accumulated for a single key during a vectored get
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub(crate) struct VectoredValueReconstructState {
|
||||
pub(crate) on_disk_values: Vec<(Lsn, OnDiskValueIoWaiter)>,
|
||||
pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pub(crate) img: Option<(Lsn, Bytes)>,
|
||||
|
||||
pub(crate) situation: ValueReconstructSituation,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct OnDiskValueIoWaiter {
|
||||
rx: tokio::sync::oneshot::Receiver<OnDiskValueIoResult>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[must_use]
|
||||
pub(crate) enum OnDiskValueIo {
|
||||
/// Traversal identified this IO as required to complete the vectored get.
|
||||
Required {
|
||||
num_active_ios: Arc<AtomicUsize>,
|
||||
tx: tokio::sync::oneshot::Sender<OnDiskValueIoResult>,
|
||||
},
|
||||
/// Sparse keyspace reads always read all the values for a given key,
|
||||
/// even though only the first value is needed.
|
||||
///
|
||||
/// This variant represents the unnecessary IOs for those values at lower LSNs
|
||||
/// that aren't needed, but are currently still being done.
|
||||
///
|
||||
/// The execution of unnecessary IOs was a pre-existing behavior before concurrent IO.
|
||||
/// We added this explicit representation here so that we can drop
|
||||
/// unnecessary IO results immediately, instead of buffering them in
|
||||
/// `oneshot` channels inside [`VectoredValueReconstructState`] until
|
||||
/// [`VectoredValueReconstructState::collect_pending_ios`] gets called.
|
||||
Unnecessary,
|
||||
}
|
||||
|
||||
type OnDiskValueIoResult = Result<OnDiskValue, std::io::Error>;
|
||||
|
||||
impl OnDiskValueIo {
|
||||
pub(crate) fn complete(self, res: OnDiskValueIoResult) {
|
||||
match self {
|
||||
OnDiskValueIo::Required { num_active_ios, tx } => {
|
||||
num_active_ios.fetch_sub(1, std::sync::atomic::Ordering::Release);
|
||||
let _ = tx.send(res);
|
||||
}
|
||||
OnDiskValueIo::Unnecessary => {
|
||||
// Nobody cared, see variant doc comment.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum WaitCompletionError {
|
||||
#[error("OnDiskValueIo was dropped without completing, likely the sidecar task panicked")]
|
||||
IoDropped,
|
||||
}
|
||||
|
||||
impl OnDiskValueIoWaiter {
|
||||
pub(crate) async fn wait_completion(self) -> Result<OnDiskValueIoResult, WaitCompletionError> {
|
||||
// NB: for Unnecessary IOs, this method never gets called because we don't add them to `on_disk_values`.
|
||||
self.rx.await.map_err(|_| WaitCompletionError::IoDropped)
|
||||
}
|
||||
situation: ValueReconstructSituation,
|
||||
}
|
||||
|
||||
impl VectoredValueReconstructState {
|
||||
/// # Cancel-Safety
|
||||
///
|
||||
/// Technically fine to stop polling this future, but, the IOs will still
|
||||
/// be executed to completion by the sidecar task and hold on to / consume resources.
|
||||
/// Better not do it to make reasonsing about the system easier.
|
||||
pub(crate) async fn collect_pending_ios(
|
||||
self,
|
||||
) -> Result<ValueReconstructState, PageReconstructError> {
|
||||
use utils::bin_ser::BeSer;
|
||||
fn get_cached_lsn(&self) -> Option<Lsn> {
|
||||
self.img.as_ref().map(|img| img.0)
|
||||
}
|
||||
}
|
||||
|
||||
let mut res = Ok(ValueReconstructState::default());
|
||||
impl From<VectoredValueReconstructState> for ValueReconstructState {
|
||||
fn from(mut state: VectoredValueReconstructState) -> Self {
|
||||
// walredo expects the records to be descending in terms of Lsn
|
||||
state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
|
||||
|
||||
// We should try hard not to bail early, so that by the time we return from this
|
||||
// function, all IO for this value is done. It's not required -- we could totally
|
||||
// stop polling the IO futures in the sidecar task, they need to support that,
|
||||
// but just stopping to poll doesn't reduce the IO load on the disk. It's easier
|
||||
// to reason about the system if we just wait for all IO to complete, even if
|
||||
// we're no longer interested in the result.
|
||||
//
|
||||
// Revisit this when IO futures are replaced with a more sophisticated IO system
|
||||
// and an IO scheduler, where we know which IOs were submitted and which ones
|
||||
// just queued. Cf the comment on IoConcurrency::spawn_io.
|
||||
for (lsn, waiter) in self.on_disk_values {
|
||||
let value_recv_res = waiter
|
||||
.wait_completion()
|
||||
// we rely on the caller to poll us to completion, so this is not a bail point
|
||||
.await;
|
||||
// Force not bailing early by wrapping the code into a closure.
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
let _: () = (|| {
|
||||
match (&mut res, value_recv_res) {
|
||||
(Err(_), _) => {
|
||||
// We've already failed, no need to process more.
|
||||
}
|
||||
(Ok(_), Err(wait_err)) => {
|
||||
// This shouldn't happen - likely the sidecar task panicked.
|
||||
res = Err(PageReconstructError::Other(wait_err.into()));
|
||||
}
|
||||
(Ok(_), Ok(Err(err))) => {
|
||||
let err: std::io::Error = err;
|
||||
// TODO: returning IO error here will fail a compute query.
|
||||
// Probably not what we want, we're not doing `maybe_fatal_err`
|
||||
// in the IO futures.
|
||||
// But it's been like that for a long time, not changing it
|
||||
// as part of concurrent IO.
|
||||
// => https://github.com/neondatabase/neon/issues/10454
|
||||
res = Err(PageReconstructError::Other(err.into()));
|
||||
}
|
||||
(Ok(ok), Ok(Ok(OnDiskValue::RawImage(img)))) => {
|
||||
assert!(ok.img.is_none());
|
||||
ok.img = Some((lsn, img));
|
||||
}
|
||||
(Ok(ok), Ok(Ok(OnDiskValue::WalRecordOrImage(buf)))) => {
|
||||
match Value::des(&buf) {
|
||||
Ok(Value::WalRecord(rec)) => {
|
||||
ok.records.push((lsn, rec));
|
||||
}
|
||||
Ok(Value::Image(img)) => {
|
||||
assert!(ok.img.is_none());
|
||||
ok.img = Some((lsn, img));
|
||||
}
|
||||
Err(err) => {
|
||||
res = Err(PageReconstructError::Other(err.into()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})();
|
||||
ValueReconstructState {
|
||||
records: state.records,
|
||||
img: state.img,
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
@@ -238,7 +109,7 @@ impl VectoredValueReconstructState {
|
||||
pub(crate) struct ValuesReconstructState {
|
||||
/// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
|
||||
/// should not expect to get anything from this hashmap.
|
||||
pub(crate) keys: HashMap<Key, VectoredValueReconstructState>,
|
||||
pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
|
||||
/// The keys which are already retrieved
|
||||
keys_done: KeySpaceRandomAccum,
|
||||
|
||||
@@ -248,365 +119,27 @@ pub(crate) struct ValuesReconstructState {
|
||||
// Statistics that are still accessible as a caller of `get_vectored_impl`.
|
||||
layers_visited: u32,
|
||||
delta_layers_visited: u32,
|
||||
|
||||
pub(crate) io_concurrency: IoConcurrency,
|
||||
num_active_ios: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
/// The level of IO concurrency to be used on the read path
|
||||
///
|
||||
/// The desired end state is that we always do parallel IO.
|
||||
/// This struct and the dispatching in the impl will be removed once
|
||||
/// we've built enough confidence.
|
||||
pub(crate) enum IoConcurrency {
|
||||
Sequential,
|
||||
SidecarTask {
|
||||
task_id: usize,
|
||||
ios_tx: tokio::sync::mpsc::UnboundedSender<IoFuture>,
|
||||
},
|
||||
}
|
||||
|
||||
type IoFuture = Pin<Box<dyn Send + Future<Output = ()>>>;
|
||||
|
||||
pub(crate) enum SelectedIoConcurrency {
|
||||
Sequential,
|
||||
SidecarTask(GateGuard),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for IoConcurrency {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
IoConcurrency::Sequential => write!(f, "Sequential"),
|
||||
IoConcurrency::SidecarTask { .. } => write!(f, "SidecarTask"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for SelectedIoConcurrency {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
SelectedIoConcurrency::Sequential => write!(f, "Sequential"),
|
||||
SelectedIoConcurrency::SidecarTask(_) => write!(f, "SidecarTask"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IoConcurrency {
|
||||
/// Force sequential IO. This is a temporary workaround until we have
|
||||
/// moved plumbing-through-the-call-stack
|
||||
/// of IoConcurrency into `RequestContextq.
|
||||
///
|
||||
/// DO NOT USE for new code.
|
||||
///
|
||||
/// Tracking issue: <https://github.com/neondatabase/neon/issues/10460>.
|
||||
pub(crate) fn sequential() -> Self {
|
||||
Self::spawn(SelectedIoConcurrency::Sequential)
|
||||
}
|
||||
|
||||
pub(crate) fn spawn_from_conf(
|
||||
conf: &'static PageServerConf,
|
||||
gate_guard: GateGuard,
|
||||
) -> IoConcurrency {
|
||||
use pageserver_api::config::GetVectoredConcurrentIo;
|
||||
let selected = match conf.get_vectored_concurrent_io {
|
||||
GetVectoredConcurrentIo::Sequential => SelectedIoConcurrency::Sequential,
|
||||
GetVectoredConcurrentIo::SidecarTask => SelectedIoConcurrency::SidecarTask(gate_guard),
|
||||
};
|
||||
Self::spawn(selected)
|
||||
}
|
||||
|
||||
pub(crate) fn spawn(io_concurrency: SelectedIoConcurrency) -> Self {
|
||||
match io_concurrency {
|
||||
SelectedIoConcurrency::Sequential => IoConcurrency::Sequential,
|
||||
SelectedIoConcurrency::SidecarTask(gate_guard) => {
|
||||
let (ios_tx, ios_rx) = tokio::sync::mpsc::unbounded_channel();
|
||||
static TASK_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
let task_id = TASK_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
// TODO: enrich the span with more context (tenant,shard,timeline) + (basebackup|pagestream|...)
|
||||
let span =
|
||||
tracing::info_span!(parent: None, "IoConcurrency_sidecar", task_id = task_id);
|
||||
trace!(task_id, "spawning sidecar task");
|
||||
tokio::spawn(async move {
|
||||
trace!("start");
|
||||
scopeguard::defer!{ trace!("end") };
|
||||
type IosRx = tokio::sync::mpsc::UnboundedReceiver<IoFuture>;
|
||||
enum State {
|
||||
Waiting {
|
||||
// invariant: is_empty(), but we recycle the allocation
|
||||
empty_futures: FuturesUnordered<IoFuture>,
|
||||
ios_rx: IosRx,
|
||||
},
|
||||
Executing {
|
||||
futures: FuturesUnordered<IoFuture>,
|
||||
ios_rx: IosRx,
|
||||
},
|
||||
ShuttingDown {
|
||||
futures: FuturesUnordered<IoFuture>,
|
||||
},
|
||||
}
|
||||
let mut state = State::Waiting {
|
||||
empty_futures: FuturesUnordered::new(),
|
||||
ios_rx,
|
||||
};
|
||||
loop {
|
||||
match state {
|
||||
State::Waiting {
|
||||
empty_futures,
|
||||
mut ios_rx,
|
||||
} => {
|
||||
assert!(empty_futures.is_empty());
|
||||
tokio::select! {
|
||||
fut = ios_rx.recv() => {
|
||||
if let Some(fut) = fut {
|
||||
trace!("received new io future");
|
||||
empty_futures.push(fut);
|
||||
state = State::Executing { futures: empty_futures, ios_rx };
|
||||
} else {
|
||||
state = State::ShuttingDown { futures: empty_futures }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
State::Executing {
|
||||
mut futures,
|
||||
mut ios_rx,
|
||||
} => {
|
||||
tokio::select! {
|
||||
res = futures.next() => {
|
||||
trace!("io future completed");
|
||||
assert!(res.is_some());
|
||||
if futures.is_empty() {
|
||||
state = State::Waiting { empty_futures: futures, ios_rx};
|
||||
} else {
|
||||
state = State::Executing { futures, ios_rx };
|
||||
}
|
||||
}
|
||||
fut = ios_rx.recv() => {
|
||||
if let Some(fut) = fut {
|
||||
trace!("received new io future");
|
||||
futures.push(fut);
|
||||
state = State::Executing { futures, ios_rx};
|
||||
} else {
|
||||
state = State::ShuttingDown { futures };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
State::ShuttingDown {
|
||||
mut futures,
|
||||
} => {
|
||||
trace!("shutting down");
|
||||
while let Some(()) = futures.next().await {
|
||||
trace!("io future completed (shutdown)");
|
||||
// drain
|
||||
}
|
||||
trace!("shutdown complete");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
drop(gate_guard); // drop it right before we exit
|
||||
}.instrument(span));
|
||||
IoConcurrency::SidecarTask { task_id, ios_tx }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn clone(&self) -> Self {
|
||||
match self {
|
||||
IoConcurrency::Sequential => IoConcurrency::Sequential,
|
||||
IoConcurrency::SidecarTask { task_id, ios_tx } => IoConcurrency::SidecarTask {
|
||||
task_id: *task_id,
|
||||
ios_tx: ios_tx.clone(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Submit an IO to be executed in the background. DEADLOCK RISK, read the full doc string.
|
||||
///
|
||||
/// The IO is represented as an opaque future.
|
||||
/// IO completion must be handled inside the future, e.g., through a oneshot channel.
|
||||
///
|
||||
/// The API seems simple but there are multiple **pitfalls** involving
|
||||
/// DEADLOCK RISK.
|
||||
///
|
||||
/// First, there are no guarantees about the exexecution of the IO.
|
||||
/// It may be `await`ed in-place before this function returns.
|
||||
/// It may be polled partially by this task and handed off to another task to be finished.
|
||||
/// It may be polled and then dropped before returning ready.
|
||||
///
|
||||
/// This means that submitted IOs must not be interedependent.
|
||||
/// Interdependence may be through shared limited resources, e.g.,
|
||||
/// - VirtualFile file descriptor cache slot acquisition
|
||||
/// - tokio-epoll-uring slot
|
||||
///
|
||||
/// # Why current usage is safe from deadlocks
|
||||
///
|
||||
/// Textbook condition for a deadlock is that _all_ of the following be given
|
||||
/// - Mutual exclusion
|
||||
/// - Hold and wait
|
||||
/// - No preemption
|
||||
/// - Circular wait
|
||||
///
|
||||
/// The current usage is safe because:
|
||||
/// - Mutual exclusion: IO futures definitely use mutexes, no way around that for now
|
||||
/// - Hold and wait: IO futures currently hold two kinds of locks/resources while waiting
|
||||
/// for acquisition of other resources:
|
||||
/// - VirtualFile file descriptor cache slot tokio mutex
|
||||
/// - tokio-epoll-uring slot (uses tokio notify => wait queue, much like mutex)
|
||||
/// - No preemption: there's no taking-away of acquired locks/resources => given
|
||||
/// - Circular wait: this is the part of the condition that isn't met: all IO futures
|
||||
/// first acquire VirtualFile mutex, then tokio-epoll-uring slot.
|
||||
/// There is no IO future that acquires slot before VirtualFile.
|
||||
/// Hence there can be no circular waiting.
|
||||
/// Hence there cannot be a deadlock.
|
||||
///
|
||||
/// This is a very fragile situation and must be revisited whenver any code called from
|
||||
/// inside the IO futures is changed.
|
||||
///
|
||||
/// We will move away from opaque IO futures towards well-defined IOs at some point in
|
||||
/// the future when we have shipped this first version of concurrent IO to production
|
||||
/// and are ready to retire the Sequential mode which runs the futures in place.
|
||||
/// Right now, while brittle, the opaque IO approach allows us to ship the feature
|
||||
/// with minimal changes to the code and minimal changes to existing behavior in Sequential mode.
|
||||
///
|
||||
/// Also read the comment in `collect_pending_ios`.
|
||||
pub(crate) async fn spawn_io<F>(&mut self, fut: F)
|
||||
where
|
||||
F: std::future::Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
match self {
|
||||
IoConcurrency::Sequential => fut.await,
|
||||
IoConcurrency::SidecarTask { ios_tx, .. } => {
|
||||
let fut = Box::pin(fut);
|
||||
// NB: experiments showed that doing an opportunistic poll of `fut` here was bad for throughput
|
||||
// while insignificant for latency.
|
||||
// It would make sense to revisit the tokio-epoll-uring API in the future such that we can try
|
||||
// a submission here, but never poll the future. That way, io_uring can make proccess while
|
||||
// the future sits in the ios_tx queue.
|
||||
match ios_tx.send(fut) {
|
||||
Ok(()) => {}
|
||||
Err(_) => {
|
||||
unreachable!("the io task must have exited, likely it panicked")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut<Target = Self> {
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use tracing::info;
|
||||
use utils::sync::gate::Gate;
|
||||
|
||||
// Spawn needs a Gate, give it one.
|
||||
struct Wrapper {
|
||||
inner: IoConcurrency,
|
||||
#[allow(dead_code)]
|
||||
gate: Box<Gate>,
|
||||
}
|
||||
impl Deref for Wrapper {
|
||||
type Target = IoConcurrency;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
impl DerefMut for Wrapper {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
let gate = Box::new(Gate::default());
|
||||
|
||||
// The default behavior when running Rust unit tests without any further
|
||||
// flags is to use the new behavior.
|
||||
// The CI uses the following environment variable to unit test both old
|
||||
// and new behavior.
|
||||
// NB: the Python regression & perf tests take the `else` branch
|
||||
// below and have their own defaults management.
|
||||
let selected = {
|
||||
// The pageserver_api::config type is unsuitable because it's internally tagged.
|
||||
#[derive(serde::Deserialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
enum TestOverride {
|
||||
Sequential,
|
||||
SidecarTask,
|
||||
}
|
||||
use once_cell::sync::Lazy;
|
||||
static TEST_OVERRIDE: Lazy<TestOverride> = Lazy::new(|| {
|
||||
utils::env::var_serde_json_string(
|
||||
"NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO",
|
||||
)
|
||||
.unwrap_or(TestOverride::SidecarTask)
|
||||
});
|
||||
|
||||
match *TEST_OVERRIDE {
|
||||
TestOverride::Sequential => SelectedIoConcurrency::Sequential,
|
||||
TestOverride::SidecarTask => {
|
||||
SelectedIoConcurrency::SidecarTask(gate.enter().expect("just created it"))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
info!(?selected, "get_vectored_concurrent_io test");
|
||||
|
||||
Wrapper {
|
||||
inner: Self::spawn(selected),
|
||||
gate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Make noise in case the [`ValuesReconstructState`] gets dropped while
|
||||
/// there are still IOs in flight.
|
||||
/// Refer to `collect_pending_ios` for why we prefer not to do that.
|
||||
//
|
||||
/// We log from here instead of from the sidecar task because the [`ValuesReconstructState`]
|
||||
/// gets dropped in a tracing span with more context.
|
||||
/// We repeat the sidecar tasks's `task_id` so we can correlate what we emit here with
|
||||
/// the logs / panic handler logs from the sidecar task, which also logs the `task_id`.
|
||||
impl Drop for ValuesReconstructState {
|
||||
fn drop(&mut self) {
|
||||
let num_active_ios = self
|
||||
.num_active_ios
|
||||
.load(std::sync::atomic::Ordering::Acquire);
|
||||
if num_active_ios == 0 {
|
||||
return;
|
||||
}
|
||||
let sidecar_task_id = match &self.io_concurrency {
|
||||
IoConcurrency::Sequential => None,
|
||||
IoConcurrency::SidecarTask { task_id, .. } => Some(*task_id),
|
||||
};
|
||||
tracing::warn!(
|
||||
num_active_ios,
|
||||
?sidecar_task_id,
|
||||
backtrace=%std::backtrace::Backtrace::force_capture(),
|
||||
"dropping ValuesReconstructState while some IOs have not been completed",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl ValuesReconstructState {
|
||||
pub(crate) fn new(io_concurrency: IoConcurrency) -> Self {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
keys: HashMap::new(),
|
||||
keys_done: KeySpaceRandomAccum::new(),
|
||||
keys_with_image_coverage: None,
|
||||
layers_visited: 0,
|
||||
delta_layers_visited: 0,
|
||||
io_concurrency,
|
||||
num_active_ios: Arc::new(AtomicUsize::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Absolutely read [`IoConcurrency::spawn_io`] to learn about assumptions & pitfalls.
|
||||
pub(crate) async fn spawn_io<F>(&mut self, fut: F)
|
||||
where
|
||||
F: std::future::Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
self.io_concurrency.spawn_io(fut).await;
|
||||
/// Associate a key with the error which it encountered and mark it as done
|
||||
pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
|
||||
let previous = self.keys.insert(key, Err(err));
|
||||
if let Some(Ok(state)) = previous {
|
||||
if state.situation == ValueReconstructSituation::Continue {
|
||||
self.keys_done.add_key(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
|
||||
@@ -626,6 +159,29 @@ impl ValuesReconstructState {
|
||||
self.layers_visited
|
||||
}
|
||||
|
||||
/// This function is called after reading a keyspace from a layer.
|
||||
/// It checks if the read path has now moved past the cached Lsn for any keys.
|
||||
///
|
||||
/// Implementation note: We intentionally iterate over the keys for which we've
|
||||
/// already collected some reconstruct data. This avoids scaling complexity with
|
||||
/// the size of the search space.
|
||||
pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
|
||||
for (key, value) in self.keys.iter_mut() {
|
||||
if !keyspace.contains(key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(state) = value {
|
||||
if state.situation != ValueReconstructSituation::Complete
|
||||
&& state.get_cached_lsn() >= Some(advanced_to)
|
||||
{
|
||||
state.situation = ValueReconstructSituation::Complete;
|
||||
self.keys_done.add_key(*key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// On hitting image layer, we can mark all keys in this range as done, because
|
||||
/// if the image layer does not contain a key, it is deleted/never added.
|
||||
pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
|
||||
@@ -643,42 +199,70 @@ impl ValuesReconstructState {
|
||||
///
|
||||
/// If the key is in the sparse keyspace (i.e., aux files), we do not track them in
|
||||
/// `key_done`.
|
||||
// TODO: rename this method & update description.
|
||||
pub(crate) fn update_key(&mut self, key: &Key, lsn: Lsn, completes: bool) -> OnDiskValueIo {
|
||||
let state = self.keys.entry(*key).or_default();
|
||||
|
||||
pub(crate) fn update_key(
|
||||
&mut self,
|
||||
key: &Key,
|
||||
lsn: Lsn,
|
||||
value: Value,
|
||||
) -> ValueReconstructSituation {
|
||||
let state = self
|
||||
.keys
|
||||
.entry(*key)
|
||||
.or_insert(Ok(VectoredValueReconstructState::default()));
|
||||
let is_sparse_key = key.is_sparse();
|
||||
if let Ok(state) = state {
|
||||
let key_done = match state.situation {
|
||||
ValueReconstructSituation::Complete => {
|
||||
if is_sparse_key {
|
||||
// Sparse keyspace might be visited multiple times because
|
||||
// we don't track unmapped keyspaces.
|
||||
return ValueReconstructSituation::Complete;
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
ValueReconstructSituation::Continue => match value {
|
||||
Value::Image(img) => {
|
||||
state.img = Some((lsn, img));
|
||||
true
|
||||
}
|
||||
Value::WalRecord(rec) => {
|
||||
debug_assert!(
|
||||
Some(lsn) > state.get_cached_lsn(),
|
||||
"Attempt to collect a record below cached LSN for walredo: {} < {}",
|
||||
lsn,
|
||||
state
|
||||
.get_cached_lsn()
|
||||
.expect("Assertion can only fire if a cached lsn is present")
|
||||
);
|
||||
|
||||
let required_io = match state.situation {
|
||||
ValueReconstructSituation::Complete => {
|
||||
if is_sparse_key {
|
||||
// Sparse keyspace might be visited multiple times because
|
||||
// we don't track unmapped keyspaces.
|
||||
return OnDiskValueIo::Unnecessary;
|
||||
} else {
|
||||
unreachable!()
|
||||
let will_init = rec.will_init();
|
||||
state.records.push((lsn, rec));
|
||||
will_init
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if key_done && state.situation == ValueReconstructSituation::Continue {
|
||||
state.situation = ValueReconstructSituation::Complete;
|
||||
if !is_sparse_key {
|
||||
self.keys_done.add_key(*key);
|
||||
}
|
||||
}
|
||||
ValueReconstructSituation::Continue => {
|
||||
self.num_active_ios
|
||||
.fetch_add(1, std::sync::atomic::Ordering::Release);
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
state.on_disk_values.push((lsn, OnDiskValueIoWaiter { rx }));
|
||||
OnDiskValueIo::Required {
|
||||
tx,
|
||||
num_active_ios: Arc::clone(&self.num_active_ios),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if completes && state.situation == ValueReconstructSituation::Continue {
|
||||
state.situation = ValueReconstructSituation::Complete;
|
||||
if !is_sparse_key {
|
||||
self.keys_done.add_key(*key);
|
||||
}
|
||||
state.situation
|
||||
} else {
|
||||
ValueReconstructSituation::Complete
|
||||
}
|
||||
}
|
||||
|
||||
required_io
|
||||
/// Returns the Lsn at which this key is cached if one exists.
|
||||
/// The read path should go no further than this Lsn for the given key.
|
||||
pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
|
||||
self.keys
|
||||
.get(key)
|
||||
.and_then(|k| k.as_ref().ok())
|
||||
.and_then(|state| state.get_cached_lsn())
|
||||
}
|
||||
|
||||
/// Returns the key space describing the keys that have
|
||||
@@ -692,6 +276,12 @@ impl ValuesReconstructState {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ValuesReconstructState {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// A key that uniquely identifies a layer in a timeline
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub(crate) enum LayerId {
|
||||
@@ -1130,78 +720,3 @@ impl<T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'_, T> {
|
||||
write!(f, "{}..{}", self.0.start, self.0.end)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests2 {
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use tracing::info;
|
||||
|
||||
use super::*;
|
||||
use crate::tenant::storage_layer::IoConcurrency;
|
||||
|
||||
/// TODO: currently this test relies on manual visual inspection of the --no-capture output.
|
||||
/// Should look like so:
|
||||
/// ```text
|
||||
/// RUST_LOG=trace cargo nextest run --features testing --no-capture test_io_concurrency_noise
|
||||
/// running 1 test
|
||||
/// 2025-01-21T17:42:01.335679Z INFO get_vectored_concurrent_io test selected=SidecarTask
|
||||
/// 2025-01-21T17:42:01.335680Z TRACE spawning sidecar task task_id=0
|
||||
/// 2025-01-21T17:42:01.335937Z TRACE IoConcurrency_sidecar{task_id=0}: start
|
||||
/// 2025-01-21T17:42:01.335972Z TRACE IoConcurrency_sidecar{task_id=0}: received new io future
|
||||
/// 2025-01-21T17:42:01.335999Z INFO IoConcurrency_sidecar{task_id=0}: waiting for signal to complete IO
|
||||
/// 2025-01-21T17:42:01.336229Z WARN dropping ValuesReconstructState while some IOs have not been completed num_active_ios=1 sidecar_task_id=Some(0) backtrace= 0: <pageserver::tenant::storage_layer::ValuesReconstructState as core::ops::drop::Drop>::drop
|
||||
/// at ./src/tenant/storage_layer.rs:553:24
|
||||
/// 1: core::ptr::drop_in_place<pageserver::tenant::storage_layer::ValuesReconstructState>
|
||||
/// at /home/christian/.rustup/toolchains/1.84.0-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:521:1
|
||||
/// 2: core::mem::drop
|
||||
/// at /home/christian/.rustup/toolchains/1.84.0-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/mem/mod.rs:942:24
|
||||
/// 3: pageserver::tenant::storage_layer::tests2::test_io_concurrency_noise::{{closure}}
|
||||
/// at ./src/tenant/storage_layer.rs:1159:9
|
||||
/// ...
|
||||
/// 49: <unknown>
|
||||
/// 2025-01-21T17:42:01.452293Z INFO IoConcurrency_sidecar{task_id=0}: completing IO
|
||||
/// 2025-01-21T17:42:01.452357Z TRACE IoConcurrency_sidecar{task_id=0}: io future completed
|
||||
/// 2025-01-21T17:42:01.452473Z TRACE IoConcurrency_sidecar{task_id=0}: end
|
||||
/// test tenant::storage_layer::tests2::test_io_concurrency_noise ... ok
|
||||
///
|
||||
/// ```
|
||||
#[tokio::test]
|
||||
async fn test_io_concurrency_noise() {
|
||||
crate::tenant::harness::setup_logging();
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
match *io_concurrency {
|
||||
IoConcurrency::Sequential => {
|
||||
// This test asserts behavior in sidecar mode, doesn't make sense in sequential mode.
|
||||
return;
|
||||
}
|
||||
IoConcurrency::SidecarTask { .. } => {}
|
||||
}
|
||||
let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());
|
||||
|
||||
let (io_fut_is_waiting_tx, io_fut_is_waiting) = tokio::sync::oneshot::channel();
|
||||
let (do_complete_io, should_complete_io) = tokio::sync::oneshot::channel();
|
||||
let (io_fut_exiting_tx, io_fut_exiting) = tokio::sync::oneshot::channel();
|
||||
|
||||
let io = reconstruct_state.update_key(&DBDIR_KEY, Lsn(8), true);
|
||||
reconstruct_state
|
||||
.spawn_io(async move {
|
||||
info!("waiting for signal to complete IO");
|
||||
io_fut_is_waiting_tx.send(()).unwrap();
|
||||
should_complete_io.await.unwrap();
|
||||
info!("completing IO");
|
||||
io.complete(Ok(OnDiskValue::RawImage(Bytes::new())));
|
||||
io_fut_exiting_tx.send(()).unwrap();
|
||||
})
|
||||
.await;
|
||||
|
||||
io_fut_is_waiting.await.unwrap();
|
||||
|
||||
// this is what makes the noise
|
||||
drop(reconstruct_state);
|
||||
|
||||
do_complete_io.send(()).unwrap();
|
||||
|
||||
io_fut_exiting.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -404,7 +404,7 @@ mod tests {
|
||||
|
||||
use crate::{
|
||||
tenant::{
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
harness::{TenantShardHarness, TIMELINE_ID},
|
||||
storage_layer::AsLayerDesc,
|
||||
},
|
||||
DEFAULT_PG_VERSION,
|
||||
@@ -431,7 +431,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_one_image() {
|
||||
let harness = TenantHarness::create("split_writer_write_one_image")
|
||||
let harness = TenantShardHarness::create("split_writer_write_one_image")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
@@ -510,7 +510,7 @@ mod tests {
|
||||
/// Test the image+delta writer by writing a large number of images and deltas. If discard is
|
||||
/// set to true, all layers will be discarded.
|
||||
async fn write_split_helper(harness_name: &'static str, discard: bool) {
|
||||
let harness = TenantHarness::create(harness_name).await.unwrap();
|
||||
let harness = TenantShardHarness::create(harness_name).await.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
let tline = tenant
|
||||
@@ -605,7 +605,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_large_img() {
|
||||
let harness = TenantHarness::create("split_writer_write_large_img")
|
||||
let harness = TenantShardHarness::create("split_writer_write_large_img")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
@@ -692,7 +692,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn write_split_single_key() {
|
||||
let harness = TenantHarness::create("split_writer_write_split_single_key")
|
||||
let harness = TenantShardHarness::create("split_writer_write_split_single_key")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
@@ -41,12 +41,13 @@ use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||
VectoredReadPlanner,
|
||||
};
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
@@ -59,7 +60,7 @@ use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::collections::VecDeque;
|
||||
use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
@@ -76,10 +77,7 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use super::{
|
||||
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
||||
ValuesReconstructState,
|
||||
};
|
||||
use super::{AsLayerDesc, LayerName, PersistentLayerDesc, ValuesReconstructState};
|
||||
|
||||
///
|
||||
/// Header stored in the beginning of the file
|
||||
@@ -228,7 +226,7 @@ pub struct DeltaLayerInner {
|
||||
index_start_blk: u32,
|
||||
index_root_blk: u32,
|
||||
|
||||
file: Arc<VirtualFile>,
|
||||
file: VirtualFile,
|
||||
file_id: FileId,
|
||||
|
||||
layer_key_range: Range<Key>,
|
||||
@@ -797,11 +795,9 @@ impl DeltaLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
let file = Arc::new(
|
||||
VirtualFile::open_v2(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?,
|
||||
);
|
||||
let file = VirtualFile::open_v2(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?;
|
||||
|
||||
let file_id = page_cache::next_file_id();
|
||||
|
||||
@@ -846,11 +842,12 @@ impl DeltaLayerInner {
|
||||
// Look up the keys in the provided keyspace and update
|
||||
// the reconstruct state with whatever is found.
|
||||
//
|
||||
// If the key is cached, go no further than the cached Lsn.
|
||||
//
|
||||
// Currently, the index is visited for each range, but this
|
||||
// can be further optimised to visit the index only once.
|
||||
pub(super) async fn get_values_reconstruct_data(
|
||||
&self,
|
||||
this: ResidentLayer,
|
||||
keyspace: KeySpace,
|
||||
lsn_range: Range<Lsn>,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
@@ -878,14 +875,17 @@ impl DeltaLayerInner {
|
||||
data_end_offset,
|
||||
index_reader,
|
||||
planner,
|
||||
reconstruct_state,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(GetVectoredError::Other)?;
|
||||
|
||||
self.do_reads_and_update_state(this, reads, reconstruct_state, ctx)
|
||||
self.do_reads_and_update_state(reads, reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
reconstruct_state.on_lsn_advanced(&keyspace, lsn_range.start);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -895,6 +895,7 @@ impl DeltaLayerInner {
|
||||
data_end_offset: u64,
|
||||
index_reader: DiskBtreeReader<Reader, DELTA_KEY_SIZE>,
|
||||
mut planner: VectoredReadPlanner,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<VectoredRead>>
|
||||
where
|
||||
@@ -921,9 +922,10 @@ impl DeltaLayerInner {
|
||||
assert!(key >= range.start);
|
||||
|
||||
let outside_lsn_range = !lsn_range.contains(&lsn);
|
||||
let below_cached_lsn = reconstruct_state.get_cached_lsn(&key) >= Some(lsn);
|
||||
|
||||
let flag = {
|
||||
if outside_lsn_range {
|
||||
if outside_lsn_range || below_cached_lsn {
|
||||
BlobFlag::Ignore
|
||||
} else if blob_ref.will_init() {
|
||||
BlobFlag::ReplaceAll
|
||||
@@ -992,78 +994,98 @@ impl DeltaLayerInner {
|
||||
|
||||
async fn do_reads_and_update_state(
|
||||
&self,
|
||||
this: ResidentLayer,
|
||||
reads: Vec<VectoredRead>,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&self.file);
|
||||
let mut ignore_key_with_err = None;
|
||||
|
||||
let max_vectored_read_bytes = self
|
||||
.max_vectored_read_bytes
|
||||
.expect("Layer is loaded with max vectored bytes config")
|
||||
.0
|
||||
.into();
|
||||
let buf_size = Self::get_min_read_buffer_size(&reads, max_vectored_read_bytes);
|
||||
let mut buf = Some(IoBufferMut::with_capacity(buf_size));
|
||||
|
||||
// Note that reads are processed in reverse order (from highest key+lsn).
|
||||
// This is the order that `ReconstructState` requires such that it can
|
||||
// track when a key is done.
|
||||
for read in reads.into_iter().rev() {
|
||||
let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice().iter().rev() {
|
||||
let io = reconstruct_state.update_key(
|
||||
&blob_meta.key,
|
||||
blob_meta.lsn,
|
||||
blob_meta.will_init,
|
||||
);
|
||||
ios.insert((blob_meta.key, blob_meta.lsn), io);
|
||||
}
|
||||
let res = vectored_blob_reader
|
||||
.read_blobs(&read, buf.take().expect("Should have a buffer"), ctx)
|
||||
.await;
|
||||
|
||||
let read_extend_residency = this.clone();
|
||||
let read_from = self.file.clone();
|
||||
let read_ctx = ctx.attached_child();
|
||||
reconstruct_state
|
||||
.spawn_io(async move {
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&read_from);
|
||||
let buf = IoBufferMut::with_capacity(buf_size);
|
||||
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, &read_ctx).await;
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
let view = BufView::new_slice(&blobs_buf.buf);
|
||||
for meta in blobs_buf.blobs.iter().rev() {
|
||||
let io = ios.remove(&(meta.meta.key, meta.meta.lsn)).unwrap();
|
||||
|
||||
let blob_read = meta.read(&view).await;
|
||||
let blob_read = match blob_read {
|
||||
Ok(buf) => buf,
|
||||
Err(e) => {
|
||||
io.complete(Err(e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
io.complete(Ok(OnDiskValue::WalRecordOrImage(
|
||||
blob_read.into_bytes(),
|
||||
)));
|
||||
}
|
||||
|
||||
assert!(ios.is_empty());
|
||||
}
|
||||
Err(err) => {
|
||||
for (_, sender) in ios {
|
||||
sender.complete(Err(std::io::Error::new(
|
||||
err.kind(),
|
||||
"vec read failed",
|
||||
)));
|
||||
}
|
||||
}
|
||||
let blobs_buf = match res {
|
||||
Ok(blobs_buf) => blobs_buf,
|
||||
Err(err) => {
|
||||
let kind = err.kind();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice() {
|
||||
reconstruct_state.on_key_error(
|
||||
blob_meta.key,
|
||||
PageReconstructError::Other(anyhow!(
|
||||
"Failed to read blobs from virtual file {}: {}",
|
||||
self.file.path(),
|
||||
kind
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
// keep layer resident until this IO is done; this spawned IO future generally outlives the
|
||||
// call to `self` / the `Arc<DownloadedLayer>` / the `ResidentLayer` that guarantees residency
|
||||
drop(read_extend_residency);
|
||||
})
|
||||
.await;
|
||||
// We have "lost" the buffer since the lower level IO api
|
||||
// doesn't return the buffer on error. Allocate a new one.
|
||||
buf = Some(IoBufferMut::with_capacity(buf_size));
|
||||
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let view = BufView::new_slice(&blobs_buf.buf);
|
||||
for meta in blobs_buf.blobs.iter().rev() {
|
||||
if Some(meta.meta.key) == ignore_key_with_err {
|
||||
continue;
|
||||
}
|
||||
let blob_read = meta.read(&view).await;
|
||||
let blob_read = match blob_read {
|
||||
Ok(buf) => buf,
|
||||
Err(e) => {
|
||||
reconstruct_state.on_key_error(
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to decompress blob from virtual file {}",
|
||||
self.file.path(),
|
||||
))),
|
||||
);
|
||||
|
||||
ignore_key_with_err = Some(meta.meta.key);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let value = Value::des(&blob_read);
|
||||
|
||||
let value = match value {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
reconstruct_state.on_key_error(
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to deserialize blob from virtual file {}",
|
||||
self.file.path(),
|
||||
))),
|
||||
);
|
||||
|
||||
ignore_key_with_err = Some(meta.meta.key);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// Invariant: once a key reaches [`ValueReconstructSituation::Complete`]
|
||||
// state, no further updates shall be made to it. The call below will
|
||||
// panic if the invariant is violated.
|
||||
reconstruct_state.update_key(&meta.meta.key, meta.meta.lsn, value);
|
||||
}
|
||||
|
||||
buf = Some(blobs_buf.buf);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1202,14 +1224,7 @@ impl DeltaLayerInner {
|
||||
let actionable = if let Some((key, lsn, start_offset)) = prev.take() {
|
||||
let end_offset = offset;
|
||||
|
||||
Some((
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init: false,
|
||||
},
|
||||
start_offset..end_offset,
|
||||
))
|
||||
Some((BlobMeta { key, lsn }, start_offset..end_offset))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -1545,9 +1560,7 @@ impl DeltaLayerIterator<'_> {
|
||||
let lsn = DeltaKey::extract_lsn_from_buf(&raw_key);
|
||||
let blob_ref = BlobRef(value);
|
||||
let offset = blob_ref.pos();
|
||||
if let Some(batch_plan) =
|
||||
self.planner.handle(key, lsn, offset, blob_ref.will_init())
|
||||
{
|
||||
if let Some(batch_plan) = self.planner.handle(key, lsn, offset) {
|
||||
break batch_plan;
|
||||
}
|
||||
} else {
|
||||
@@ -1605,11 +1618,11 @@ pub(crate) mod test {
|
||||
use crate::tenant::harness::TIMELINE_ID;
|
||||
use crate::tenant::storage_layer::{Layer, ResidentLayer};
|
||||
use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
|
||||
use crate::tenant::{Tenant, Timeline};
|
||||
use crate::tenant::{TenantShard, Timeline};
|
||||
use crate::{
|
||||
context::DownloadBehavior,
|
||||
task_mgr::TaskKind,
|
||||
tenant::{disk_btree::tests::TestDisk, harness::TenantHarness},
|
||||
tenant::{disk_btree::tests::TestDisk, harness::TenantShardHarness},
|
||||
DEFAULT_PG_VERSION,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
@@ -1660,6 +1673,7 @@ pub(crate) mod test {
|
||||
.expect("In memory disk finish should never fail");
|
||||
let reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(0, root_offset, disk);
|
||||
let planner = VectoredReadPlanner::new(100);
|
||||
let mut reconstruct_state = ValuesReconstructState::new();
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
|
||||
|
||||
let keyspace = KeySpace {
|
||||
@@ -1677,6 +1691,7 @@ pub(crate) mod test {
|
||||
disk_offset,
|
||||
reader,
|
||||
planner,
|
||||
&mut reconstruct_state,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1864,7 +1879,8 @@ pub(crate) mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delta_layer_vectored_read_end_to_end() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("test_delta_layer_oversized_vectored_read").await?;
|
||||
let harness =
|
||||
TenantShardHarness::create("test_delta_layer_oversized_vectored_read").await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
let timeline_id = TimelineId::generate();
|
||||
@@ -1920,6 +1936,7 @@ pub(crate) mod test {
|
||||
);
|
||||
|
||||
let planner = VectoredReadPlanner::new(constants::MAX_VECTORED_READ_BYTES);
|
||||
let mut reconstruct_state = ValuesReconstructState::new();
|
||||
let keyspace = pick_random_keyspace(rng, &entries_meta.key_range);
|
||||
let data_end_offset = inner.index_start_blk as u64 * PAGE_SZ as u64;
|
||||
|
||||
@@ -1929,6 +1946,7 @@ pub(crate) mod test {
|
||||
data_end_offset,
|
||||
index_reader,
|
||||
planner,
|
||||
&mut reconstruct_state,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -1965,7 +1983,7 @@ pub(crate) mod test {
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
|
||||
let h = crate::tenant::harness::TenantHarness::create("truncate_delta_smoke")
|
||||
let h = crate::tenant::harness::TenantShardHarness::create("truncate_delta_smoke")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = h.load().await;
|
||||
@@ -2197,7 +2215,7 @@ pub(crate) mod test {
|
||||
}
|
||||
|
||||
pub(crate) async fn produce_delta_layer(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
tline: &Arc<Timeline>,
|
||||
mut deltas: Vec<(Key, Lsn, Value)>,
|
||||
ctx: &RequestContext,
|
||||
@@ -2251,7 +2269,9 @@ pub(crate) mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn delta_layer_iterator() {
|
||||
let harness = TenantHarness::create("delta_layer_iterator").await.unwrap();
|
||||
let harness = TenantShardHarness::create("delta_layer_iterator")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
let tline = tenant
|
||||
|
||||
@@ -106,7 +106,7 @@ mod tests {
|
||||
|
||||
use crate::{
|
||||
tenant::{
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
harness::{TenantShardHarness, TIMELINE_ID},
|
||||
storage_layer::delta_layer::test::produce_delta_layer,
|
||||
},
|
||||
DEFAULT_PG_VERSION,
|
||||
@@ -137,7 +137,7 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::value::Value;
|
||||
|
||||
let harness = TenantHarness::create("filter_iterator_filter_keyspace_iterator")
|
||||
let harness = TenantShardHarness::create("filter_iterator_filter_keyspace_iterator")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
@@ -38,11 +38,12 @@ use crate::tenant::vectored_blob_io::{
|
||||
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||
VectoredReadPlanner,
|
||||
};
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
|
||||
use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::Bytes;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hex;
|
||||
@@ -55,13 +56,12 @@ use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use pageserver_api::value::Value;
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::collections::VecDeque;
|
||||
use std::fs::File;
|
||||
use std::io::SeekFrom;
|
||||
use std::ops::Range;
|
||||
use std::os::unix::prelude::FileExt;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::OnceCell;
|
||||
use tokio_stream::StreamExt;
|
||||
use tracing::*;
|
||||
@@ -73,10 +73,7 @@ use utils::{
|
||||
};
|
||||
|
||||
use super::layer_name::ImageLayerName;
|
||||
use super::{
|
||||
AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
|
||||
ValuesReconstructState,
|
||||
};
|
||||
use super::{AsLayerDesc, LayerName, PersistentLayerDesc, ValuesReconstructState};
|
||||
|
||||
///
|
||||
/// Header stored in the beginning of the file
|
||||
@@ -167,7 +164,7 @@ pub struct ImageLayerInner {
|
||||
key_range: Range<Key>,
|
||||
lsn: Lsn,
|
||||
|
||||
file: Arc<VirtualFile>,
|
||||
file: VirtualFile,
|
||||
file_id: FileId,
|
||||
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
@@ -394,11 +391,9 @@ impl ImageLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
let file = Arc::new(
|
||||
VirtualFile::open_v2(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?,
|
||||
);
|
||||
let file = VirtualFile::open_v2(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
let block_reader = FileBlockReader::new(&file, file_id);
|
||||
let summary_blk = block_reader
|
||||
@@ -444,7 +439,6 @@ impl ImageLayerInner {
|
||||
// the reconstruct state with whatever is found.
|
||||
pub(super) async fn get_values_reconstruct_data(
|
||||
&self,
|
||||
this: ResidentLayer,
|
||||
keyspace: KeySpace,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
@@ -454,7 +448,7 @@ impl ImageLayerInner {
|
||||
.await
|
||||
.map_err(GetVectoredError::Other)?;
|
||||
|
||||
self.do_reads_and_update_state(this, reads, reconstruct_state, ctx)
|
||||
self.do_reads_and_update_state(reads, reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
reconstruct_state.on_image_layer_visited(&self.key_range);
|
||||
@@ -576,7 +570,6 @@ impl ImageLayerInner {
|
||||
|
||||
async fn do_reads_and_update_state(
|
||||
&self,
|
||||
this: ResidentLayer,
|
||||
reads: Vec<VectoredRead>,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
@@ -587,13 +580,8 @@ impl ImageLayerInner {
|
||||
.0
|
||||
.into();
|
||||
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&self.file);
|
||||
for read in reads.into_iter() {
|
||||
let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice() {
|
||||
let io = reconstruct_state.update_key(&blob_meta.key, blob_meta.lsn, true);
|
||||
ios.insert((blob_meta.key, blob_meta.lsn), io);
|
||||
}
|
||||
|
||||
let buf_size = read.size();
|
||||
|
||||
if buf_size > max_vectored_read_bytes {
|
||||
@@ -623,51 +611,50 @@ impl ImageLayerInner {
|
||||
}
|
||||
}
|
||||
|
||||
let read_extend_residency = this.clone();
|
||||
let read_from = self.file.clone();
|
||||
let read_ctx = ctx.attached_child();
|
||||
reconstruct_state
|
||||
.spawn_io(async move {
|
||||
let buf = IoBufferMut::with_capacity(buf_size);
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&read_from);
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, &read_ctx).await;
|
||||
let buf = IoBufferMut::with_capacity(buf_size);
|
||||
let res = vectored_blob_reader.read_blobs(&read, buf, ctx).await;
|
||||
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
let view = BufView::new_slice(&blobs_buf.buf);
|
||||
for meta in blobs_buf.blobs.iter() {
|
||||
let io: OnDiskValueIo =
|
||||
ios.remove(&(meta.meta.key, meta.meta.lsn)).unwrap();
|
||||
let img_buf = meta.read(&view).await;
|
||||
match res {
|
||||
Ok(blobs_buf) => {
|
||||
let view = BufView::new_slice(&blobs_buf.buf);
|
||||
for meta in blobs_buf.blobs.iter() {
|
||||
let img_buf = meta.read(&view).await;
|
||||
|
||||
let img_buf = match img_buf {
|
||||
Ok(img_buf) => img_buf,
|
||||
Err(e) => {
|
||||
io.complete(Err(e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let img_buf = match img_buf {
|
||||
Ok(img_buf) => img_buf,
|
||||
Err(e) => {
|
||||
reconstruct_state.on_key_error(
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to decompress blob from virtual file {}",
|
||||
self.file.path(),
|
||||
))),
|
||||
);
|
||||
|
||||
io.complete(Ok(OnDiskValue::RawImage(img_buf.into_bytes())));
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(ios.is_empty());
|
||||
}
|
||||
Err(err) => {
|
||||
for (_, io) in ios {
|
||||
io.complete(Err(std::io::Error::new(
|
||||
err.kind(),
|
||||
"vec read failed",
|
||||
)));
|
||||
}
|
||||
}
|
||||
};
|
||||
reconstruct_state.update_key(
|
||||
&meta.meta.key,
|
||||
self.lsn,
|
||||
Value::Image(img_buf.into_bytes()),
|
||||
);
|
||||
}
|
||||
|
||||
// keep layer resident until this IO is done; this spawned IO future generally outlives the
|
||||
// call to `self` / the `Arc<DownloadedLayer>` / the `ResidentLayer` that guarantees residency
|
||||
drop(read_extend_residency);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
Err(err) => {
|
||||
let kind = err.kind();
|
||||
for (_, blob_meta) in read.blobs_at.as_slice() {
|
||||
reconstruct_state.on_key_error(
|
||||
blob_meta.key,
|
||||
PageReconstructError::from(anyhow!(
|
||||
"Failed to read blobs from virtual file {}: {}",
|
||||
self.file.path(),
|
||||
kind
|
||||
)),
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1082,7 +1069,6 @@ impl ImageLayerIterator<'_> {
|
||||
Key::from_slice(&raw_key[..KEY_SIZE]),
|
||||
self.image_layer.lsn,
|
||||
offset,
|
||||
true,
|
||||
) {
|
||||
break batch_plan;
|
||||
}
|
||||
@@ -1152,10 +1138,10 @@ mod test {
|
||||
context::RequestContext,
|
||||
tenant::{
|
||||
config::TenantConf,
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
harness::{TenantShardHarness, TIMELINE_ID},
|
||||
storage_layer::{Layer, ResidentLayer},
|
||||
vectored_blob_io::StreamingVectoredReadPlanner,
|
||||
Tenant, Timeline,
|
||||
TenantShard, Timeline,
|
||||
},
|
||||
DEFAULT_PG_VERSION,
|
||||
};
|
||||
@@ -1184,7 +1170,7 @@ mod test {
|
||||
// Create an unsharded parent with a layer.
|
||||
//
|
||||
|
||||
let harness = TenantHarness::create_custom(
|
||||
let harness = TenantShardHarness::create_custom(
|
||||
"test_image_layer_rewrite--parent",
|
||||
tenant_conf.clone(),
|
||||
tenant_id,
|
||||
@@ -1247,7 +1233,7 @@ mod test {
|
||||
ShardStripeSize(0x8000),
|
||||
)
|
||||
.unwrap();
|
||||
let harness = TenantHarness::create_custom(
|
||||
let harness = TenantShardHarness::create_custom(
|
||||
Box::leak(Box::new(format!(
|
||||
"test_image_layer_rewrite--child{}",
|
||||
shard_identity.shard_slug()
|
||||
@@ -1338,7 +1324,7 @@ mod test {
|
||||
}
|
||||
|
||||
async fn produce_image_layer(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
tline: &Arc<Timeline>,
|
||||
mut images: Vec<(Key, Bytes)>,
|
||||
lsn: Lsn,
|
||||
@@ -1394,7 +1380,9 @@ mod test {
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_layer_iterator() {
|
||||
let harness = TenantHarness::create("image_layer_iterator").await.unwrap();
|
||||
let harness = TenantShardHarness::create("image_layer_iterator")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
let tline = tenant
|
||||
|
||||
@@ -8,22 +8,23 @@ use crate::assert_u64_eq_usize::{u64_to_usize, U64IsUsize, UsizeIsU64};
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
|
||||
use crate::tenant::ephemeral_file::EphemeralFile;
|
||||
use crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo};
|
||||
use crate::tenant::timeline::GetVectoredError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::{l0_flush, page_cache};
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, Result};
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::key::CompactKey;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::InMemoryLayerInfo;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use std::time::Instant;
|
||||
use tracing::*;
|
||||
use utils::{id::TimelineId, lsn::Lsn, vec_map::VecMap};
|
||||
use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap};
|
||||
use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
|
||||
// avoid binding to Write (conflicts with std::io::Write)
|
||||
// while being able to use std::fmt::Write's methods
|
||||
@@ -35,7 +36,9 @@ use std::sync::atomic::Ordering as AtomicOrdering;
|
||||
use std::sync::atomic::{AtomicU64, AtomicUsize};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
|
||||
use super::{
|
||||
DeltaLayerWriter, PersistentLayerDesc, ValueReconstructSituation, ValuesReconstructState,
|
||||
};
|
||||
|
||||
pub(crate) mod vectored_dio_read;
|
||||
|
||||
@@ -412,8 +415,10 @@ impl InMemoryLayer {
|
||||
|
||||
// Look up the keys in the provided keyspace and update
|
||||
// the reconstruct state with whatever is found.
|
||||
//
|
||||
// If the key is cached, go no further than the cached Lsn.
|
||||
pub(crate) async fn get_values_reconstruct_data(
|
||||
self: &Arc<InMemoryLayer>,
|
||||
&self,
|
||||
keyspace: KeySpace,
|
||||
end_lsn: Lsn,
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
@@ -430,9 +435,6 @@ impl InMemoryLayer {
|
||||
read: vectored_dio_read::LogicalRead<Vec<u8>>,
|
||||
}
|
||||
let mut reads: HashMap<Key, Vec<ValueRead>> = HashMap::new();
|
||||
let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default();
|
||||
|
||||
let lsn_range = self.start_lsn..end_lsn;
|
||||
|
||||
for range in keyspace.ranges.iter() {
|
||||
for (key, vec_map) in inner
|
||||
@@ -440,7 +442,12 @@ impl InMemoryLayer {
|
||||
.range(range.start.to_compact()..range.end.to_compact())
|
||||
{
|
||||
let key = Key::from_compact(*key);
|
||||
let slice = vec_map.slice_range(lsn_range.clone());
|
||||
let lsn_range = match reconstruct_state.get_cached_lsn(&key) {
|
||||
Some(cached_lsn) => (cached_lsn + 1)..end_lsn,
|
||||
None => self.start_lsn..end_lsn,
|
||||
};
|
||||
|
||||
let slice = vec_map.slice_range(lsn_range);
|
||||
|
||||
for (entry_lsn, index_entry) in slice.iter().rev() {
|
||||
let IndexEntryUnpacked {
|
||||
@@ -456,59 +463,55 @@ impl InMemoryLayer {
|
||||
Vec::with_capacity(len as usize),
|
||||
),
|
||||
});
|
||||
|
||||
let io = reconstruct_state.update_key(&key, *entry_lsn, will_init);
|
||||
ios.insert((key, *entry_lsn), io);
|
||||
|
||||
if will_init {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
drop(inner); // release the lock before we spawn the IO; if it's serial-mode IO we will deadlock on the read().await below
|
||||
let read_from = Arc::clone(self);
|
||||
let read_ctx = ctx.attached_child();
|
||||
reconstruct_state
|
||||
.spawn_io(async move {
|
||||
let inner = read_from.inner.read().await;
|
||||
let f = vectored_dio_read::execute(
|
||||
&inner.file,
|
||||
reads
|
||||
.iter()
|
||||
.flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),
|
||||
&read_ctx,
|
||||
);
|
||||
send_future::SendFuture::send(f) // https://github.com/rust-lang/rust/issues/96865
|
||||
.await;
|
||||
|
||||
for (key, value_reads) in reads {
|
||||
for ValueRead { entry_lsn, read } in value_reads {
|
||||
let io = ios.remove(&(key, entry_lsn)).expect("sender must exist");
|
||||
match read.into_result().expect("we run execute() above") {
|
||||
Err(e) => {
|
||||
io.complete(Err(std::io::Error::new(
|
||||
e.kind(),
|
||||
"dio vec read failed",
|
||||
)));
|
||||
}
|
||||
Ok(value_buf) => {
|
||||
io.complete(Ok(OnDiskValue::WalRecordOrImage(value_buf.into())));
|
||||
}
|
||||
// Execute the reads.
|
||||
|
||||
let f = vectored_dio_read::execute(
|
||||
&inner.file,
|
||||
reads
|
||||
.iter()
|
||||
.flat_map(|(_, value_reads)| value_reads.iter().map(|v| &v.read)),
|
||||
&ctx,
|
||||
);
|
||||
send_future::SendFuture::send(f) // https://github.com/rust-lang/rust/issues/96865
|
||||
.await;
|
||||
|
||||
// Process results into the reconstruct state
|
||||
'next_key: for (key, value_reads) in reads {
|
||||
for ValueRead { entry_lsn, read } in value_reads {
|
||||
match read.into_result().expect("we run execute() above") {
|
||||
Err(e) => {
|
||||
reconstruct_state.on_key_error(key, PageReconstructError::from(anyhow!(e)));
|
||||
continue 'next_key;
|
||||
}
|
||||
Ok(value_buf) => {
|
||||
let value = Value::des(&value_buf);
|
||||
if let Err(e) = value {
|
||||
reconstruct_state
|
||||
.on_key_error(key, PageReconstructError::from(anyhow!(e)));
|
||||
continue 'next_key;
|
||||
}
|
||||
|
||||
let key_situation =
|
||||
reconstruct_state.update_key(&key, entry_lsn, value.unwrap());
|
||||
if key_situation == ValueReconstructSituation::Complete {
|
||||
// TODO: metric to see if we fetched more values than necessary
|
||||
continue 'next_key;
|
||||
}
|
||||
|
||||
// process the next value in the next iteration of the loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(ios.is_empty());
|
||||
|
||||
// Keep layer existent until this IO is done;
|
||||
// This is kinda forced for InMemoryLayer because we need to inner.read() anyway,
|
||||
// but it's less obvious for DeltaLayer and ImageLayer. So, keep this explicit
|
||||
// drop for consistency among all three layer types.
|
||||
drop(inner);
|
||||
drop(read_from);
|
||||
})
|
||||
.await;
|
||||
reconstruct_state.on_lsn_advanced(&keyspace, self.start_lsn);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -603,7 +606,6 @@ impl InMemoryLayer {
|
||||
// Write the batch to the file
|
||||
inner.file.write_raw(&raw, ctx).await?;
|
||||
let new_size = inner.file.len();
|
||||
|
||||
let expected_new_len = base_offset
|
||||
.checked_add(raw.len().into_u64())
|
||||
// write_raw would error if we were to overflow u64.
|
||||
|
||||
@@ -308,7 +308,7 @@ impl Layer {
|
||||
reconstruct_data: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), GetVectoredError> {
|
||||
let downloaded = self
|
||||
let layer = self
|
||||
.0
|
||||
.get_or_maybe_download(true, Some(ctx))
|
||||
.await
|
||||
@@ -318,15 +318,11 @@ impl Layer {
|
||||
}
|
||||
other => GetVectoredError::Other(anyhow::anyhow!(other)),
|
||||
})?;
|
||||
let this = ResidentLayer {
|
||||
downloaded: downloaded.clone(),
|
||||
owner: self.clone(),
|
||||
};
|
||||
|
||||
self.record_access(ctx);
|
||||
|
||||
downloaded
|
||||
.get_values_reconstruct_data(this, keyspace, lsn_range, reconstruct_data, ctx)
|
||||
layer
|
||||
.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, &self.0, ctx)
|
||||
.instrument(tracing::debug_span!("get_values_reconstruct_data", layer=%self))
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
@@ -1772,25 +1768,25 @@ impl DownloadedLayer {
|
||||
|
||||
async fn get_values_reconstruct_data(
|
||||
&self,
|
||||
this: ResidentLayer,
|
||||
keyspace: KeySpace,
|
||||
lsn_range: Range<Lsn>,
|
||||
reconstruct_data: &mut ValuesReconstructState,
|
||||
owner: &Arc<LayerInner>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), GetVectoredError> {
|
||||
use LayerKind::*;
|
||||
|
||||
match self
|
||||
.get(&this.owner.0, ctx)
|
||||
.get(owner, ctx)
|
||||
.await
|
||||
.map_err(GetVectoredError::Other)?
|
||||
{
|
||||
Delta(d) => {
|
||||
d.get_values_reconstruct_data(this, keyspace, lsn_range, reconstruct_data, ctx)
|
||||
d.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, ctx)
|
||||
.await
|
||||
}
|
||||
Image(i) => {
|
||||
i.get_values_reconstruct_data(this, keyspace, reconstruct_data, ctx)
|
||||
i.get_values_reconstruct_data(keyspace, reconstruct_data, ctx)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use pageserver_api::key::{Key, CONTROLFILE_KEY};
|
||||
use pageserver_api::key::CONTROLFILE_KEY;
|
||||
use tokio::task::JoinSet;
|
||||
use utils::{
|
||||
completion::{self, Completion},
|
||||
@@ -9,14 +9,8 @@ use utils::{
|
||||
|
||||
use super::failpoints::{Failpoint, FailpointKind};
|
||||
use super::*;
|
||||
use crate::{
|
||||
context::DownloadBehavior,
|
||||
tenant::{
|
||||
harness::test_img,
|
||||
storage_layer::{IoConcurrency, LayerVisibilityHint},
|
||||
},
|
||||
};
|
||||
use crate::{task_mgr::TaskKind, tenant::harness::TenantHarness};
|
||||
use crate::{context::DownloadBehavior, tenant::storage_layer::LayerVisibilityHint};
|
||||
use crate::{task_mgr::TaskKind, tenant::harness::TenantShardHarness};
|
||||
|
||||
/// Used in tests to advance a future to wanted await point, and not futher.
|
||||
const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);
|
||||
@@ -30,59 +24,27 @@ const FOREVER: std::time::Duration = std::time::Duration::from_secs(ADVANCE.as_s
|
||||
async fn smoke_test() {
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let h = TenantHarness::create("smoke_test").await.unwrap();
|
||||
let h = TenantShardHarness::create("smoke_test").await.unwrap();
|
||||
let span = h.span();
|
||||
let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1));
|
||||
let (tenant, _) = h.load().await;
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
|
||||
|
||||
let image_layers = vec![(
|
||||
Lsn(0x40),
|
||||
vec![(
|
||||
Key::from_hex("620000000033333333444444445500000000").unwrap(),
|
||||
test_img("foo"),
|
||||
)],
|
||||
)];
|
||||
|
||||
// Create a test timeline with one real layer, and one synthetic test layer. The synthetic
|
||||
// one is only there so that we can GC the real one without leaving the timeline's metadata
|
||||
// empty, which is an illegal state (see [`IndexPart::validate`]).
|
||||
let timeline = tenant
|
||||
.create_test_timeline_with_layers(
|
||||
TimelineId::generate(),
|
||||
Lsn(0x10),
|
||||
14,
|
||||
&ctx,
|
||||
Default::default(),
|
||||
image_layers,
|
||||
Lsn(0x100),
|
||||
)
|
||||
.create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Grab one of the timeline's layers to exercise in the test, and the other layer that is just
|
||||
// there to avoid the timeline being illegally empty
|
||||
let (layer, dummy_layer) = {
|
||||
let layer = {
|
||||
let mut layers = {
|
||||
let layers = timeline.layers.read().await;
|
||||
layers.likely_resident_layers().cloned().collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
assert_eq!(layers.len(), 2);
|
||||
assert_eq!(layers.len(), 1);
|
||||
|
||||
layers.sort_by_key(|l| l.layer_desc().get_key_range().start);
|
||||
let synthetic_layer = layers.pop().unwrap();
|
||||
let real_layer = layers.pop().unwrap();
|
||||
tracing::info!(
|
||||
"real_layer={:?} ({}), synthetic_layer={:?} ({})",
|
||||
real_layer,
|
||||
real_layer.layer_desc().file_size,
|
||||
synthetic_layer,
|
||||
synthetic_layer.layer_desc().file_size
|
||||
);
|
||||
(real_layer, synthetic_layer)
|
||||
layers.swap_remove(0)
|
||||
};
|
||||
|
||||
// all layers created at pageserver are like `layer`, initialized with strong
|
||||
@@ -93,7 +55,7 @@ async fn smoke_test() {
|
||||
};
|
||||
|
||||
let img_before = {
|
||||
let mut data = ValuesReconstructState::new(io_concurrency.clone());
|
||||
let mut data = ValuesReconstructState::default();
|
||||
layer
|
||||
.get_values_reconstruct_data(
|
||||
controlfile_keyspace.clone(),
|
||||
@@ -103,13 +65,10 @@ async fn smoke_test() {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
data.keys
|
||||
.remove(&CONTROLFILE_KEY)
|
||||
.expect("must be present")
|
||||
.collect_pending_ios()
|
||||
.await
|
||||
.expect("must not error")
|
||||
.expect("should not error")
|
||||
.img
|
||||
.take()
|
||||
.expect("tenant harness writes the control file")
|
||||
@@ -128,7 +87,7 @@ async fn smoke_test() {
|
||||
|
||||
// on accesses when the layer is evicted, it will automatically be downloaded.
|
||||
let img_after = {
|
||||
let mut data = ValuesReconstructState::new(io_concurrency.clone());
|
||||
let mut data = ValuesReconstructState::default();
|
||||
layer
|
||||
.get_values_reconstruct_data(
|
||||
controlfile_keyspace.clone(),
|
||||
@@ -142,9 +101,7 @@ async fn smoke_test() {
|
||||
data.keys
|
||||
.remove(&CONTROLFILE_KEY)
|
||||
.expect("must be present")
|
||||
.collect_pending_ios()
|
||||
.await
|
||||
.expect("must not error")
|
||||
.expect("should not error")
|
||||
.img
|
||||
.take()
|
||||
.expect("tenant harness writes the control file")
|
||||
@@ -216,13 +173,10 @@ async fn smoke_test() {
|
||||
|
||||
let rtc = &timeline.remote_client;
|
||||
|
||||
// Simulate GC removing our test layer.
|
||||
{
|
||||
let mut g = timeline.layers.write().await;
|
||||
|
||||
let layers = &[layer];
|
||||
let mut g = timeline.layers.write().await;
|
||||
g.open_mut().unwrap().finish_gc_timeline(layers);
|
||||
|
||||
// this just updates the remote_physical_size for demonstration purposes
|
||||
rtc.schedule_gc_update(layers).unwrap();
|
||||
}
|
||||
@@ -237,10 +191,7 @@ async fn smoke_test() {
|
||||
|
||||
rtc.wait_completion().await.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
rtc.get_remote_physical_size(),
|
||||
dummy_layer.metadata().file_size
|
||||
);
|
||||
assert_eq!(rtc.get_remote_physical_size(), 0);
|
||||
assert_eq!(0, LAYER_IMPL_METRICS.inits_cancelled.get())
|
||||
}
|
||||
|
||||
@@ -251,7 +202,7 @@ async fn evict_and_wait_on_wanted_deleted() {
|
||||
// this is the runtime on which Layer spawns the blocking tasks on
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let h = TenantHarness::create("evict_and_wait_on_wanted_deleted")
|
||||
let h = TenantShardHarness::create("evict_and_wait_on_wanted_deleted")
|
||||
.await
|
||||
.unwrap();
|
||||
utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
@@ -335,7 +286,7 @@ fn read_wins_pending_eviction() {
|
||||
rt.block_on(async move {
|
||||
// this is the runtime on which Layer spawns the blocking tasks on
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
let h = TenantHarness::create("read_wins_pending_eviction")
|
||||
let h = TenantShardHarness::create("read_wins_pending_eviction")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = h.load().await;
|
||||
@@ -469,7 +420,7 @@ fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) {
|
||||
rt.block_on(async move {
|
||||
// this is the runtime on which Layer spawns the blocking tasks on
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
let h = TenantHarness::create(name).await.unwrap();
|
||||
let h = TenantShardHarness::create(name).await.unwrap();
|
||||
let (tenant, ctx) = h.load().await;
|
||||
let span = h.span();
|
||||
let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1));
|
||||
@@ -638,7 +589,7 @@ fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) {
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() {
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
let h = TenantHarness::create("cancelled_get_or_maybe_download_does_not_cancel_eviction")
|
||||
let h = TenantShardHarness::create("cancelled_get_or_maybe_download_does_not_cancel_eviction")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = h.load().await;
|
||||
@@ -716,7 +667,7 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() {
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn evict_and_wait_does_not_wait_for_download() {
|
||||
// let handle = tokio::runtime::Handle::current();
|
||||
let h = TenantHarness::create("evict_and_wait_does_not_wait_for_download")
|
||||
let h = TenantShardHarness::create("evict_and_wait_does_not_wait_for_download")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = h.load().await;
|
||||
@@ -815,7 +766,7 @@ async fn eviction_cancellation_on_drop() {
|
||||
// this is the runtime on which Layer spawns the blocking tasks on
|
||||
let handle = tokio::runtime::Handle::current();
|
||||
|
||||
let h = TenantHarness::create("eviction_cancellation_on_drop")
|
||||
let h = TenantShardHarness::create("eviction_cancellation_on_drop")
|
||||
.await
|
||||
.unwrap();
|
||||
utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
|
||||
|
||||
@@ -357,7 +357,7 @@ mod tests {
|
||||
|
||||
use crate::{
|
||||
tenant::{
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
harness::{TenantShardHarness, TIMELINE_ID},
|
||||
storage_layer::delta_layer::test::{produce_delta_layer, sort_delta},
|
||||
},
|
||||
DEFAULT_PG_VERSION,
|
||||
@@ -393,7 +393,7 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::value::Value;
|
||||
|
||||
let harness = TenantHarness::create("merge_iterator_merge_in_between")
|
||||
let harness = TenantShardHarness::create("merge_iterator_merge_in_between")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
@@ -458,7 +458,7 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::value::Value;
|
||||
|
||||
let harness = TenantHarness::create("merge_iterator_delta_merge")
|
||||
let harness = TenantShardHarness::create("merge_iterator_delta_merge")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
@@ -535,7 +535,7 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::value::Value;
|
||||
|
||||
let harness = TenantHarness::create("merge_iterator_delta_image_mixed_merge")
|
||||
let harness = TenantShardHarness::create("merge_iterator_delta_image_mixed_merge")
|
||||
.await
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
@@ -12,7 +12,7 @@ use crate::task_mgr;
|
||||
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
|
||||
use crate::tenant::throttle::Stats;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use crate::tenant::{TenantShard, TenantState};
|
||||
use rand::Rng;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
@@ -67,9 +67,10 @@ pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
|
||||
) -> tokio::sync::SemaphorePermit<'static> {
|
||||
let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE.measure_acquisition(loop_kind);
|
||||
|
||||
if loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation {
|
||||
pausable_failpoint!("initial-size-calculation-permit-pause");
|
||||
}
|
||||
pausable_failpoint!(
|
||||
"initial-size-calculation-permit-pause",
|
||||
loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation
|
||||
);
|
||||
|
||||
// TODO: assert that we run on BACKGROUND_RUNTIME; requires tokio_unstable Handle::id();
|
||||
match CONCURRENT_BACKGROUND_TASKS.acquire().await {
|
||||
@@ -80,7 +81,7 @@ pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
|
||||
|
||||
/// Start per tenant background loops: compaction and gc.
|
||||
pub fn start_background_loops(
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
background_jobs_can_start: Option<&completion::Barrier>,
|
||||
) {
|
||||
let tenant_shard_id = tenant.tenant_shard_id;
|
||||
@@ -157,7 +158,7 @@ pub fn start_background_loops(
|
||||
///
|
||||
/// Compaction task's main loop
|
||||
///
|
||||
async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
async fn compaction_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||
const MAX_BACKOFF_SECS: f64 = 300.0;
|
||||
// How many errors we have seen consequtively
|
||||
let mut error_run_count = 0;
|
||||
@@ -317,7 +318,7 @@ fn log_compaction_error(
|
||||
///
|
||||
/// GC task's main loop
|
||||
///
|
||||
async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
async fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||
const MAX_BACKOFF_SECS: f64 = 300.0;
|
||||
// How many errors we have seen consequtively
|
||||
let mut error_run_count = 0;
|
||||
@@ -417,7 +418,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
|
||||
}
|
||||
|
||||
async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
async fn ingest_housekeeping_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
|
||||
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
|
||||
async {
|
||||
let mut last_throttle_flag_reset_at = Instant::now();
|
||||
@@ -495,7 +496,7 @@ async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
|
||||
TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
|
||||
}
|
||||
|
||||
async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
|
||||
async fn wait_for_active_tenant(tenant: &Arc<TenantShard>) -> ControlFlow<()> {
|
||||
// if the tenant has a proper status already, no need to wait for anything
|
||||
if tenant.current_state() == TenantState::Active {
|
||||
ControlFlow::Continue(())
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -42,8 +42,8 @@ use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
||||
use crate::tenant::storage_layer::{
|
||||
AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState,
|
||||
};
|
||||
use crate::tenant::timeline::ImageLayerCreationOutcome;
|
||||
use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter};
|
||||
use crate::tenant::timeline::{ImageLayerCreationOutcome, IoConcurrency};
|
||||
use crate::tenant::timeline::{Layer, ResidentLayer};
|
||||
use crate::tenant::{gc_block, DeltaLayer, MaybeOffloaded};
|
||||
use crate::virtual_file::{MaybeFatalIo, VirtualFile};
|
||||
@@ -436,24 +436,12 @@ impl KeyHistoryRetention {
|
||||
if dry_run {
|
||||
return true;
|
||||
}
|
||||
if key.key_range.end == Key::MAX {
|
||||
warn!(
|
||||
key=%key,
|
||||
"discard layer due to layer key range being Key::MAX, which should not happen",
|
||||
);
|
||||
if cfg!(feature = "testing") {
|
||||
panic!("Key::MAX should not be part of the key space");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
let layer_generation;
|
||||
{
|
||||
let guard = tline.layers.read().await;
|
||||
if !guard.contains_key(key) {
|
||||
return false;
|
||||
}
|
||||
layer_generation = guard.get_from_key(key).metadata().generation;
|
||||
let guard = tline.layers.read().await;
|
||||
if !guard.contains_key(key) {
|
||||
return false;
|
||||
}
|
||||
let layer_generation = guard.get_from_key(key).metadata().generation;
|
||||
drop(guard);
|
||||
if layer_generation == tline.generation {
|
||||
info!(
|
||||
key=%key,
|
||||
@@ -2150,11 +2138,6 @@ impl Timeline {
|
||||
self.get_gc_compaction_watermark()
|
||||
};
|
||||
|
||||
if compact_below_lsn == Lsn::INVALID {
|
||||
tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction");
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
// Split compaction job to about 4GB each
|
||||
const GC_COMPACT_MAX_SIZE_MB: u64 = 4 * 1024;
|
||||
let sub_compaction_max_job_size_mb =
|
||||
@@ -2163,7 +2146,12 @@ impl Timeline {
|
||||
let mut compact_jobs = Vec::new();
|
||||
// For now, we simply use the key partitioning information; we should do a more fine-grained partitioning
|
||||
// by estimating the amount of files read for a compaction job. We should also partition on LSN.
|
||||
let ((dense_ks, sparse_ks), _) = self.partitioning.read().as_ref().clone();
|
||||
let ((dense_ks, sparse_ks), _) = {
|
||||
let Ok(partition) = self.partitioning.try_lock() else {
|
||||
bail!("failed to acquire partition lock during gc-compaction");
|
||||
};
|
||||
partition.clone()
|
||||
};
|
||||
// Truncate the key range to be within user specified compaction range.
|
||||
fn truncate_to(
|
||||
source_start: &Key,
|
||||
@@ -2229,12 +2217,6 @@ impl Timeline {
|
||||
} else {
|
||||
end
|
||||
};
|
||||
let end = if ranges_num == idx + 1 {
|
||||
// extend the compaction range to the end of the key range if it's the last partition
|
||||
end.max(job.compact_key_range.end)
|
||||
} else {
|
||||
end
|
||||
};
|
||||
info!(
|
||||
"splitting compaction job: {}..{}, estimated_size={}",
|
||||
start, end, total_size
|
||||
@@ -2355,11 +2337,6 @@ impl Timeline {
|
||||
// each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
|
||||
// the real cutoff.
|
||||
let mut gc_cutoff = if compact_lsn_range.end == Lsn::MAX {
|
||||
if real_gc_cutoff == Lsn::INVALID {
|
||||
// If the gc_cutoff is not generated yet, we should not compact anything.
|
||||
tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction");
|
||||
return Ok(());
|
||||
}
|
||||
real_gc_cutoff
|
||||
} else {
|
||||
compact_lsn_range.end
|
||||
@@ -2770,15 +2747,7 @@ impl Timeline {
|
||||
|
||||
let produced_image_layers = if let Some(writer) = image_layer_writer {
|
||||
if !dry_run {
|
||||
let mut end_key = job_desc.compaction_key_range.end;
|
||||
if end_key == Key::MAX {
|
||||
// There's a potential bug to-be-resolved when end_key is Key::MAX, so we need to subtract 1 to workaround it.
|
||||
// Note that we never write Key::MAX into the image layer, as this is not part of the key space.
|
||||
end_key = Key {
|
||||
field6: u32::MAX - 1,
|
||||
..Key::MAX
|
||||
};
|
||||
}
|
||||
let end_key = job_desc.compaction_key_range.end;
|
||||
writer
|
||||
.finish_with_discard_fn(self, ctx, end_key, discard)
|
||||
.await?
|
||||
@@ -2899,7 +2868,7 @@ impl Timeline {
|
||||
"produced {} delta layers and {} image layers, {} layers are kept",
|
||||
produced_delta_layers_len,
|
||||
produced_image_layers_len,
|
||||
keep_layers.len()
|
||||
layer_selection.len()
|
||||
);
|
||||
|
||||
// Step 3: Place back to the layer map.
|
||||
@@ -2945,28 +2914,8 @@ impl Timeline {
|
||||
// be batched into `schedule_compaction_update`.
|
||||
let disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||
self.schedule_uploads(disk_consistent_lsn, None)?;
|
||||
// If a layer gets rewritten throughout gc-compaction, we need to keep that layer only in `compact_to` instead
|
||||
// of `compact_from`.
|
||||
let compact_from = {
|
||||
let mut compact_from = Vec::new();
|
||||
let mut compact_to_set = HashMap::new();
|
||||
for layer in &compact_to {
|
||||
compact_to_set.insert(layer.layer_desc().key(), layer);
|
||||
}
|
||||
for layer in &layer_selection {
|
||||
if let Some(to) = compact_to_set.get(&layer.layer_desc().key()) {
|
||||
tracing::info!(
|
||||
"skipping delete {} because found same layer key at different generation {}",
|
||||
layer, to
|
||||
);
|
||||
} else {
|
||||
compact_from.push(layer.clone());
|
||||
}
|
||||
}
|
||||
compact_from
|
||||
};
|
||||
self.remote_client
|
||||
.schedule_compaction_update(&compact_from, &compact_to)?;
|
||||
.schedule_compaction_update(&layer_selection, &compact_to)?;
|
||||
|
||||
drop(gc_lock);
|
||||
|
||||
@@ -3226,7 +3175,6 @@ impl TimelineAdaptor {
|
||||
ctx,
|
||||
key_range.clone(),
|
||||
start,
|
||||
IoConcurrency::sequential(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ use crate::{
|
||||
tenant::{
|
||||
metadata::TimelineMetadata,
|
||||
remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
|
||||
CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant,
|
||||
TenantManifestError, TimelineOrOffloaded,
|
||||
CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, TenantManifestError,
|
||||
TenantShard, TimelineOrOffloaded,
|
||||
},
|
||||
virtual_file::MaybeFatalIo,
|
||||
};
|
||||
@@ -112,9 +112,9 @@ pub(super) async fn delete_local_timeline_directory(
|
||||
}
|
||||
|
||||
/// It is important that this gets called when DeletionGuard is being held.
|
||||
/// For more context see comments in [`make_timeline_delete_guard`]
|
||||
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
|
||||
async fn remove_maybe_offloaded_timeline_from_tenant(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &TimelineOrOffloaded,
|
||||
_: &DeletionGuard, // using it as a witness
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -188,13 +188,15 @@ impl DeleteTimelineFlow {
|
||||
// error out if some of the shutdown tasks have already been completed!
|
||||
#[instrument(skip_all)]
|
||||
pub async fn run(
|
||||
tenant: &Arc<Tenant>,
|
||||
tenant: &Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
super::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let allow_offloaded_children = false;
|
||||
let set_stopping = true;
|
||||
let (timeline, mut guard) =
|
||||
make_timeline_delete_guard(tenant, timeline_id, TimelineDeleteGuardKind::Delete)?;
|
||||
Self::prepare(tenant, timeline_id, allow_offloaded_children, set_stopping)?;
|
||||
|
||||
guard.mark_in_progress()?;
|
||||
|
||||
@@ -284,7 +286,7 @@ impl DeleteTimelineFlow {
|
||||
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
||||
#[instrument(skip_all, fields(%timeline_id))]
|
||||
pub(crate) async fn resume_deletion(
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
remote_client: RemoteTimelineClient,
|
||||
@@ -331,10 +333,79 @@ impl DeleteTimelineFlow {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn prepare(
|
||||
tenant: &TenantShard,
|
||||
timeline_id: TimelineId,
|
||||
allow_offloaded_children: bool,
|
||||
set_stopping: bool,
|
||||
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
||||
// Note the interaction between this guard and deletion guard.
|
||||
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
|
||||
// This is important because when you take into account `remove_timeline_from_tenant`
|
||||
// we remove timeline from memory when we still hold the deletion guard.
|
||||
// So here when timeline deletion is finished timeline wont be present in timelines map at all
|
||||
// which makes the following sequence impossible:
|
||||
// T1: get preempted right before the try_lock on `Timeline::delete_progress`
|
||||
// T2: do a full deletion, acquire and drop `Timeline::delete_progress`
|
||||
// T1: acquire deletion lock, do another `DeleteTimelineFlow::run`
|
||||
// For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
|
||||
let timelines = tenant.timelines.lock().unwrap();
|
||||
let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
|
||||
|
||||
let timeline = match timelines.get(&timeline_id) {
|
||||
Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
|
||||
None => match timelines_offloaded.get(&timeline_id) {
|
||||
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
|
||||
None => return Err(DeleteTimelineError::NotFound),
|
||||
},
|
||||
};
|
||||
|
||||
// Ensure that there are no child timelines, because we are about to remove files,
|
||||
// which will break child branches
|
||||
let mut children = Vec::new();
|
||||
if !allow_offloaded_children {
|
||||
children.extend(timelines_offloaded.iter().filter_map(|(id, entry)| {
|
||||
(entry.ancestor_timeline_id == Some(timeline_id)).then_some(*id)
|
||||
}));
|
||||
}
|
||||
children.extend(timelines.iter().filter_map(|(id, entry)| {
|
||||
(entry.get_ancestor_timeline_id() == Some(timeline_id)).then_some(*id)
|
||||
}));
|
||||
|
||||
if !children.is_empty() {
|
||||
return Err(DeleteTimelineError::HasChildren(children));
|
||||
}
|
||||
|
||||
// Note that using try_lock here is important to avoid a deadlock.
|
||||
// Here we take lock on timelines and then the deletion guard.
|
||||
// At the end of the operation we're holding the guard and need to lock timelines map
|
||||
// to remove the timeline from it.
|
||||
// Always if you have two locks that are taken in different order this can result in a deadlock.
|
||||
|
||||
let delete_progress = Arc::clone(timeline.delete_progress());
|
||||
let delete_lock_guard = match delete_progress.try_lock_owned() {
|
||||
Ok(guard) => DeletionGuard(guard),
|
||||
Err(_) => {
|
||||
// Unfortunately if lock fails arc is consumed.
|
||||
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
|
||||
timeline.delete_progress(),
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
if set_stopping {
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((timeline, delete_lock_guard))
|
||||
}
|
||||
|
||||
fn schedule_background(
|
||||
guard: DeletionGuard,
|
||||
conf: &'static PageServerConf,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
timeline: TimelineOrOffloaded,
|
||||
remote_client: Arc<RemoteTimelineClient>,
|
||||
) {
|
||||
@@ -368,7 +439,7 @@ impl DeleteTimelineFlow {
|
||||
async fn background(
|
||||
mut guard: DeletionGuard,
|
||||
conf: &PageServerConf,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &TimelineOrOffloaded,
|
||||
remote_client: Arc<RemoteTimelineClient>,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
@@ -412,80 +483,6 @@ impl DeleteTimelineFlow {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||
pub(super) enum TimelineDeleteGuardKind {
|
||||
Offload,
|
||||
Delete,
|
||||
}
|
||||
|
||||
pub(super) fn make_timeline_delete_guard(
|
||||
tenant: &Tenant,
|
||||
timeline_id: TimelineId,
|
||||
guard_kind: TimelineDeleteGuardKind,
|
||||
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
||||
// Note the interaction between this guard and deletion guard.
|
||||
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
|
||||
// This is important because when you take into account `remove_timeline_from_tenant`
|
||||
// we remove timeline from memory when we still hold the deletion guard.
|
||||
// So here when timeline deletion is finished timeline wont be present in timelines map at all
|
||||
// which makes the following sequence impossible:
|
||||
// T1: get preempted right before the try_lock on `Timeline::delete_progress`
|
||||
// T2: do a full deletion, acquire and drop `Timeline::delete_progress`
|
||||
// T1: acquire deletion lock, do another `DeleteTimelineFlow::run`
|
||||
// For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
|
||||
let timelines = tenant.timelines.lock().unwrap();
|
||||
let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
|
||||
|
||||
let timeline = match timelines.get(&timeline_id) {
|
||||
Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
|
||||
None => match timelines_offloaded.get(&timeline_id) {
|
||||
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
|
||||
None => return Err(DeleteTimelineError::NotFound),
|
||||
},
|
||||
};
|
||||
|
||||
// Ensure that there are no child timelines, because we are about to remove files,
|
||||
// which will break child branches
|
||||
let mut children = Vec::new();
|
||||
if guard_kind == TimelineDeleteGuardKind::Delete {
|
||||
children.extend(timelines_offloaded.iter().filter_map(|(id, entry)| {
|
||||
(entry.ancestor_timeline_id == Some(timeline_id)).then_some(*id)
|
||||
}));
|
||||
}
|
||||
children.extend(timelines.iter().filter_map(|(id, entry)| {
|
||||
(entry.get_ancestor_timeline_id() == Some(timeline_id)).then_some(*id)
|
||||
}));
|
||||
|
||||
if !children.is_empty() {
|
||||
return Err(DeleteTimelineError::HasChildren(children));
|
||||
}
|
||||
|
||||
// Note that using try_lock here is important to avoid a deadlock.
|
||||
// Here we take lock on timelines and then the deletion guard.
|
||||
// At the end of the operation we're holding the guard and need to lock timelines map
|
||||
// to remove the timeline from it.
|
||||
// Always if you have two locks that are taken in different order this can result in a deadlock.
|
||||
|
||||
let delete_progress = Arc::clone(timeline.delete_progress());
|
||||
let delete_lock_guard = match delete_progress.try_lock_owned() {
|
||||
Ok(guard) => DeletionGuard(guard),
|
||||
Err(_) => {
|
||||
// Unfortunately if lock fails arc is consumed.
|
||||
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
|
||||
timeline.delete_progress(),
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
if guard_kind == TimelineDeleteGuardKind::Delete {
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((timeline, delete_lock_guard))
|
||||
}
|
||||
|
||||
pub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
|
||||
|
||||
impl Deref for DeletionGuard {
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::{
|
||||
tenant::{
|
||||
remote_timeline_client::index::GcBlockingReason::DetachAncestor,
|
||||
storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
|
||||
Tenant,
|
||||
TenantShard,
|
||||
},
|
||||
virtual_file::{MaybeFatalIo, VirtualFile},
|
||||
};
|
||||
@@ -159,7 +159,7 @@ impl Attempt {
|
||||
/// See [`Timeline::prepare_to_detach_from_ancestor`]
|
||||
pub(super) async fn prepare(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
options: Options,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Progress, Error> {
|
||||
@@ -410,7 +410,7 @@ pub(super) async fn prepare(
|
||||
Ok(Progress::Prepared(attempt, prepared))
|
||||
}
|
||||
|
||||
async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
|
||||
async fn start_new_attempt(detached: &Timeline, tenant: &TenantShard) -> Result<Attempt, Error> {
|
||||
let attempt = obtain_exclusive_attempt(detached, tenant)?;
|
||||
|
||||
// insert the block in the index_part.json, if not already there.
|
||||
@@ -426,13 +426,16 @@ async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attem
|
||||
Ok(attempt)
|
||||
}
|
||||
|
||||
async fn continue_with_blocked_gc(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
|
||||
async fn continue_with_blocked_gc(
|
||||
detached: &Timeline,
|
||||
tenant: &TenantShard,
|
||||
) -> Result<Attempt, Error> {
|
||||
// FIXME: it would be nice to confirm that there is an in-memory version, since we've just
|
||||
// verified there is a persistent one?
|
||||
obtain_exclusive_attempt(detached, tenant)
|
||||
}
|
||||
|
||||
fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Attempt, Error> {
|
||||
fn obtain_exclusive_attempt(detached: &Timeline, tenant: &TenantShard) -> Result<Attempt, Error> {
|
||||
use Error::{OtherTimelineDetachOngoing, ShuttingDown};
|
||||
|
||||
// ensure we are the only active attempt for this tenant
|
||||
@@ -460,7 +463,7 @@ fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result<Atte
|
||||
|
||||
fn reparented_direct_children(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
) -> Result<HashSet<TimelineId>, Error> {
|
||||
let mut all_direct_children = tenant
|
||||
.timelines
|
||||
@@ -698,7 +701,7 @@ impl DetachingAndReparenting {
|
||||
/// See [`Timeline::detach_from_ancestor_and_reparent`].
|
||||
pub(super) async fn detach_and_reparent(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
prepared: PreparedTimelineDetach,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<DetachingAndReparenting, Error> {
|
||||
@@ -901,7 +904,7 @@ pub(super) async fn detach_and_reparent(
|
||||
|
||||
pub(super) async fn complete(
|
||||
detached: &Arc<Timeline>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
mut attempt: Attempt,
|
||||
_ctx: &RequestContext,
|
||||
) -> Result<(), Error> {
|
||||
@@ -970,7 +973,7 @@ where
|
||||
}
|
||||
|
||||
fn check_no_archived_children_of_ancestor(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
detached: &Arc<Timeline>,
|
||||
ancestor: &Arc<Timeline>,
|
||||
ancestor_lsn: Lsn,
|
||||
|
||||
@@ -31,7 +31,8 @@ use crate::{
|
||||
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
||||
tenant::{
|
||||
size::CalculateSyntheticSizeError, storage_layer::LayerVisibilityHint,
|
||||
tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant,
|
||||
tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause,
|
||||
TenantShard,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -52,7 +53,7 @@ pub struct EvictionTaskTenantState {
|
||||
impl Timeline {
|
||||
pub(super) fn launch_eviction_task(
|
||||
self: &Arc<Self>,
|
||||
parent: Arc<Tenant>,
|
||||
parent: Arc<TenantShard>,
|
||||
background_tasks_can_start: Option<&completion::Barrier>,
|
||||
) {
|
||||
let self_clone = Arc::clone(self);
|
||||
@@ -79,7 +80,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
|
||||
async fn eviction_task(self: Arc<Self>, tenant: Arc<Tenant>) {
|
||||
async fn eviction_task(self: Arc<Self>, tenant: Arc<TenantShard>) {
|
||||
use crate::tenant::tasks::random_init_delay;
|
||||
|
||||
// acquire the gate guard only once within a useful span
|
||||
@@ -123,7 +124,7 @@ impl Timeline {
|
||||
#[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
|
||||
async fn eviction_iteration(
|
||||
self: &Arc<Self>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
policy: &EvictionPolicy,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -180,7 +181,7 @@ impl Timeline {
|
||||
|
||||
async fn eviction_iteration_threshold(
|
||||
self: &Arc<Self>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -314,7 +315,7 @@ impl Timeline {
|
||||
/// disk usage based eviction task.
|
||||
async fn imitiate_only(
|
||||
self: &Arc<Self>,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -370,7 +371,7 @@ impl Timeline {
|
||||
#[instrument(skip_all)]
|
||||
async fn imitate_layer_accesses(
|
||||
&self,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
p: &EvictionPolicyLayerAccessThreshold,
|
||||
cancel: &CancellationToken,
|
||||
gate: &GateGuard,
|
||||
@@ -506,7 +507,7 @@ impl Timeline {
|
||||
#[instrument(skip_all)]
|
||||
async fn imitate_synthetic_size_calculation_worker(
|
||||
&self,
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
|
||||
@@ -32,151 +32,54 @@
|
||||
//!
|
||||
//! # Design
|
||||
//!
|
||||
//! ## Data Structures
|
||||
//!
|
||||
//! There are three user-facing data structures:
|
||||
//! - `PerTimelineState`: a struct embedded into each Timeline struct. Lifetime == Timeline lifetime.
|
||||
//! - `Cache`: a struct private to each connection handler; Lifetime == connection lifetime.
|
||||
//! - `Handle`: a smart pointer that holds the Timeline gate open and derefs to `&Timeline`.
|
||||
//! - `WeakHandle`: downgrade of a `Handle` that does not keep the gate open, but allows
|
||||
//! trying to ugprade back to a `Handle`, guaranteeing it's the same `Timeline` *object*.
|
||||
//! Lifetime: for a single request dispatch on the Timeline (i.e., one getpage request)
|
||||
//!
|
||||
//! Internally, there is 0 or 1 `HandleInner` per `(Cache,Timeline)`.
|
||||
//! Since Cache:Connection is 1:1, there is 0 or 1 `HandleInner` per `(Connection,Timeline)`.
|
||||
//! The `Handle` is just a wrapper around an `Arc<HandleInner>`.
|
||||
//!
|
||||
//! The `HandleInner` is allocated as a `Arc<Mutex<HandleInner>>` and
|
||||
//! referenced weakly and strongly from various places which we are now illustrating.
|
||||
//! For brevity, we will omit the `Arc<Mutex<>>` part in the following and instead
|
||||
//! use `strong ref` and `weak ref` when referring to the `Arc<Mutex<HandleInner>>`
|
||||
//! or `Weak<Mutex<HandleInner>>`, respectively.
|
||||
//!
|
||||
//! - The `Handle` is a strong ref.
|
||||
//! - The `WeakHandle` is a weak ref.
|
||||
//! - The `PerTimelineState` contains a `HashMap<CacheId, strong ref>`.
|
||||
//! - The `Cache` is a `HashMap<unique identifier for the shard, weak ref>`.
|
||||
//!
|
||||
//! Lifetimes:
|
||||
//! - `WeakHandle` and `Handle`: single pagestream request.
|
||||
//! - `Cache`: single page service connection.
|
||||
//! - `PerTimelineState`: lifetime of the Timeline object (i.e., i.e., till `Timeline::shutdown`).
|
||||
//!
|
||||
//! ## Request Handling Flow (= filling and using the `Cache``)
|
||||
//! There is one long-lived `Arc<HandleInner>`, which is stored in the `PerTimelineState`.
|
||||
//! The `Cache` stores a `Weak<HandleInner>` for each cached Timeline.
|
||||
//!
|
||||
//! To dispatch a request, the page service connection calls `Cache::get`.
|
||||
//!
|
||||
//! A cache miss means we consult the tenant manager for shard routing,
|
||||
//! resulting in an `Arc<Timeline>`. We enter its gate _once_ and store it in the the
|
||||
//! `Arc<Mutex<HandleInner>>>`. A weak ref is stored in the `Cache`
|
||||
//! and a strong ref in the `PerTimelineState`.
|
||||
//! A strong ref is returned wrapped in a `Handle`.
|
||||
//! resulting in an `Arc<Timeline>`. We enter its gate _once_ and construct an
|
||||
//! `Arc<HandleInner>`. We store a `Weak<HandleInner>` in the cache
|
||||
//! and the `Arc<HandleInner>` in the `PerTimelineState`.
|
||||
//!
|
||||
//! For subsequent requests, `Cache::get` will perform a "fast path" shard routing
|
||||
//! and find the weak ref in the cache.
|
||||
//! We upgrade the weak ref to a strong ref and return it wrapped in a `Handle`.
|
||||
//! and find the `Weak<HandleInner>` in the cache.
|
||||
//! We upgrade the `Weak<HandleInner>` to an `Arc<HandleInner>` and wrap it in the user-facing `Handle` type.
|
||||
//!
|
||||
//! The pagestream processing is pipelined and involves a batching step.
|
||||
//! While a request is batching, the `Handle` is downgraded to a `WeakHandle`.
|
||||
//! When the batch is ready to be executed, the `WeakHandle` is upgraded back to a `Handle`
|
||||
//! and the request handler dispatches the request to the right `<Handle as Deref<Target = Timeline>>::$request_method`.
|
||||
//! The request handler dispatches the request to the right `<Handle as Deref<Target = Timeline>>::$request_method`.
|
||||
//! It then drops the `Handle`, which drops the `Arc<HandleInner>`.
|
||||
//!
|
||||
//! # Performance
|
||||
//! # Memory Management / How The Reference Cycle Is Broken
|
||||
//!
|
||||
//! Remember from the introductory section:
|
||||
//! The attentive reader may have noticed the strong reference cycle
|
||||
//! from `Arc<HandleInner>` to `PerTimelineState` to `Arc<Timeline>`.
|
||||
//!
|
||||
//! > However, we want to avoid the overhead of entering the gate for every
|
||||
//! > method invocation.
|
||||
//!
|
||||
//! Why do we want to avoid that?
|
||||
//! Because the gate is a shared location in memory and entering it involves
|
||||
//! bumping refcounts, which leads to cache contention if done frequently
|
||||
//! from multiple cores in parallel.
|
||||
//!
|
||||
//! So, we only acquire the `GateGuard` once on `Cache` miss, and wrap it in an `Arc`.
|
||||
//! That `Arc` is private to the `HandleInner` and hence to the connection.
|
||||
//! (Review the "Data Structures" section if that is unclear to you.)
|
||||
//!
|
||||
//! A `WeakHandle` is a weak ref to the `HandleInner`.
|
||||
//! When upgrading a `WeakHandle`, we upgrade to a strong ref to the `HandleInner` and
|
||||
//! further acquire an additional strong ref to the `Arc<GateGuard>` inside it.
|
||||
//! Again, this manipulation of ref counts is is cheap because `Arc` is private to the connection.
|
||||
//!
|
||||
//! When downgrading a `Handle` to a `WeakHandle`, we drop the `Arc<GateGuard>`.
|
||||
//! Again, this is cheap because the `Arc` is private to the connection.
|
||||
//!
|
||||
//! In addition to the GateGuard, we need to provide `Deref<Target=Timeline>` impl.
|
||||
//! For this, both `Handle` need infallible access to an `Arc<Timeline>`.
|
||||
//! We could clone the `Arc<Timeline>` when upgrading a `WeakHandle`, but that would cause contention
|
||||
//! on the shared memory location that trakcs the refcount of the `Arc<Timeline>`.
|
||||
//! Instead, we wrap the `Arc<Timeline>` into another `Arc`.
|
||||
//! so that we can clone it cheaply when upgrading a `WeakHandle`.
|
||||
//!
|
||||
//! # Shutdown
|
||||
//!
|
||||
//! The attentive reader may have noticed the following reference cycle around the `Arc<Timeline>`:
|
||||
//!
|
||||
//! ```text
|
||||
//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Timeline
|
||||
//! ```
|
||||
//!
|
||||
//! Further, there is this cycle:
|
||||
//!
|
||||
//! ```text
|
||||
//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> GateGuard --keepalive--> Timeline
|
||||
//! ```
|
||||
//!
|
||||
//! The former cycle is a memory leak if not broken.
|
||||
//! The latter cycle further prevents the Timeline from shutting down
|
||||
//! because we certainly won't drop the Timeline while the GateGuard is alive.
|
||||
//! Preventing shutdown is the whole point of this handle/cache system,
|
||||
//! but when the Timeline needs to shut down, we need to break the cycle.
|
||||
//! This cycle is intentional: while it exists, the `Cache` can upgrade its
|
||||
//! `Weak<HandleInner>` to an `Arc<HandleInner>` in a single atomic operation.
|
||||
//!
|
||||
//! The cycle is broken by either
|
||||
//! - Timeline shutdown (=> `PerTimelineState::shutdown`)
|
||||
//! - Connection shutdown (=> dropping the `Cache`).
|
||||
//! - `PerTimelineState::shutdown` or
|
||||
//! - dropping the `Cache`.
|
||||
//!
|
||||
//! Both transition the `HandleInner` from [`HandleInner::KeepingTimelineGateOpen`] to
|
||||
//! [`HandleInner::ShutDown`], which drops the only long-lived strong ref to the
|
||||
//! `Arc<GateGuard>`.
|
||||
//!
|
||||
//! `PerTimelineState::shutdown` drops all the `HandleInners` it contains,
|
||||
//! thereby breaking the cycle.
|
||||
//! It also initiates draining of already existing `Handle`s by
|
||||
//! poisoning things so that no new `HandleInner`'s can be added
|
||||
//! to the `PerTimelineState`, which will make subsequent `Cache::get` fail.
|
||||
//!
|
||||
//! Concurrently existing / already upgraded `Handle`s will extend the
|
||||
//! lifetime of the `Arc<Mutex<HandleInner>>` and hence cycles.
|
||||
//! Concurrently existing `Handle`s will extend the existence of the cycle.
|
||||
//! However, since `Handle`s are short-lived and new `Handle`s are not
|
||||
//! handed out from `Cache::get` or `WeakHandle::upgrade` after
|
||||
//! `PerTimelineState::shutdown`, that extension of the cycle is bounded.
|
||||
//!
|
||||
//! Concurrently existing `WeakHandle`s will fail to `upgrade()`:
|
||||
//! while they will succeed in upgrading `Weak<Mutex<HandleInner>>`,
|
||||
//! they will find the inner in state `HandleInner::ShutDown` state where the
|
||||
//! `Arc<GateGuard>` and Timeline has already been dropped.
|
||||
//!
|
||||
//! Dropping the `Cache` undoes the registration of this `Cache`'s
|
||||
//! `HandleInner`s from all the `PerTimelineState`s, i.e., it
|
||||
//! removes the strong ref to each of its `HandleInner`s
|
||||
//! from all the `PerTimelineState`.
|
||||
//!
|
||||
//! # Locking Rules
|
||||
//!
|
||||
//! To prevent deadlocks we:
|
||||
//!
|
||||
//! 1. Only ever hold one of the locks at a time.
|
||||
//! 2. Don't add more than one Drop impl that locks on the
|
||||
//! cycles above.
|
||||
//!
|
||||
//! As per (2), that impl is in `Drop for Cache`.
|
||||
//! handed out after either `PerTimelineState::shutdown` or `Cache` drop,
|
||||
//! that extension of the cycle is bounded.
|
||||
//!
|
||||
//! # Fast Path for Shard Routing
|
||||
//!
|
||||
//! The `Cache` has a fast path for shard routing to avoid calling into
|
||||
//! the tenant manager for every request.
|
||||
//!
|
||||
//! The `Cache` maintains a hash map of `ShardTimelineId` to `WeakHandle`s.
|
||||
//! The `Cache` maintains a hash map of `ShardTimelineId` to `Weak<HandleInner>`.
|
||||
//!
|
||||
//! The current implementation uses the first entry in the hash map
|
||||
//! to determine the `ShardParameters` and derive the correct
|
||||
@@ -184,18 +87,18 @@
|
||||
//!
|
||||
//! It then looks up the hash map for that `ShardTimelineId := {ShardIndex,TimelineId}`.
|
||||
//!
|
||||
//! If the lookup is successful and the `WeakHandle` can be upgraded,
|
||||
//! If the lookup is successful and the `Weak<HandleInner>` can be upgraded,
|
||||
//! it's a hit.
|
||||
//!
|
||||
//! ## Cache invalidation
|
||||
//!
|
||||
//! The insight is that cache invalidation is sufficient and most efficiently if done lazily.
|
||||
//! The insight is that cache invalidation is sufficient and most efficiently done lazily.
|
||||
//! The only reasons why an entry in the cache can become stale are:
|
||||
//! 1. The `PerTimelineState` / Timeline is shutting down e.g. because the shard is
|
||||
//! being detached, timeline or shard deleted, or pageserver is shutting down.
|
||||
//! 2. We're doing a shard split and new traffic should be routed to the child shards.
|
||||
//!
|
||||
//! Regarding (1), we will eventually fail to upgrade the `WeakHandle` once the
|
||||
//! Regarding (1), we will eventually fail to upgrade the `Weak<HandleInner>` once the
|
||||
//! timeline has shut down, and when that happens, we remove the entry from the cache.
|
||||
//!
|
||||
//! Regarding (2), the insight is that it is toally fine to keep dispatching requests
|
||||
@@ -204,6 +107,8 @@
|
||||
|
||||
use std::collections::hash_map;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Weak;
|
||||
@@ -247,7 +152,7 @@ pub(crate) struct Cache<T: Types> {
|
||||
map: Map<T>,
|
||||
}
|
||||
|
||||
type Map<T> = HashMap<ShardTimelineId, WeakHandle<T>>;
|
||||
type Map<T> = HashMap<ShardTimelineId, Weak<HandleInner<T>>>;
|
||||
|
||||
impl<T: Types> Default for Cache<T> {
|
||||
fn default() -> Self {
|
||||
@@ -265,22 +170,12 @@ pub(crate) struct ShardTimelineId {
|
||||
}
|
||||
|
||||
/// See module-level comment.
|
||||
pub(crate) struct Handle<T: Types> {
|
||||
timeline: Arc<T::Timeline>,
|
||||
#[allow(dead_code)] // the field exists to keep the gate open
|
||||
gate_guard: Arc<utils::sync::gate::GateGuard>,
|
||||
inner: Arc<Mutex<HandleInner<T>>>,
|
||||
}
|
||||
pub(crate) struct WeakHandle<T: Types> {
|
||||
inner: Weak<Mutex<HandleInner<T>>>,
|
||||
}
|
||||
enum HandleInner<T: Types> {
|
||||
KeepingTimelineGateOpen {
|
||||
#[allow(dead_code)]
|
||||
gate_guard: Arc<utils::sync::gate::GateGuard>,
|
||||
timeline: Arc<T::Timeline>,
|
||||
},
|
||||
ShutDown,
|
||||
pub(crate) struct Handle<T: Types>(Arc<HandleInner<T>>);
|
||||
struct HandleInner<T: Types> {
|
||||
shut_down: AtomicBool,
|
||||
timeline: T::Timeline,
|
||||
// The timeline's gate held open.
|
||||
_gate_guard: utils::sync::gate::GateGuard,
|
||||
}
|
||||
|
||||
/// Embedded in each [`Types::Timeline`] as the anchor for the only long-lived strong ref to `HandleInner`.
|
||||
@@ -288,8 +183,7 @@ enum HandleInner<T: Types> {
|
||||
/// See module-level comment for details.
|
||||
pub struct PerTimelineState<T: Types> {
|
||||
// None = shutting down
|
||||
#[allow(clippy::type_complexity)]
|
||||
handles: Mutex<Option<HashMap<CacheId, Arc<Mutex<HandleInner<T>>>>>>,
|
||||
handles: Mutex<Option<HashMap<CacheId, Arc<HandleInner<T>>>>>,
|
||||
}
|
||||
|
||||
impl<T: Types> Default for PerTimelineState<T> {
|
||||
@@ -349,24 +243,49 @@ impl<T: Types> Cache<T> {
|
||||
shard_selector: ShardSelector,
|
||||
tenant_manager: &T::TenantManager,
|
||||
) -> Result<Handle<T>, GetError<T>> {
|
||||
// terminates because when every iteration we remove an element from the map
|
||||
let miss: ShardSelector = loop {
|
||||
// terminates because each iteration removes an element from the map
|
||||
loop {
|
||||
let handle = self
|
||||
.get_impl(timeline_id, shard_selector, tenant_manager)
|
||||
.await?;
|
||||
if handle.0.shut_down.load(Ordering::Relaxed) {
|
||||
let removed = self
|
||||
.map
|
||||
.remove(&handle.0.timeline.shard_timeline_id())
|
||||
.expect("invariant of get_impl is that the returned handle is in the map");
|
||||
assert!(
|
||||
Weak::ptr_eq(&removed, &Arc::downgrade(&handle.0)),
|
||||
"shard_timeline_id() incorrect?"
|
||||
);
|
||||
} else {
|
||||
return Ok(handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip_all)]
|
||||
async fn get_impl(
|
||||
&mut self,
|
||||
timeline_id: TimelineId,
|
||||
shard_selector: ShardSelector,
|
||||
tenant_manager: &T::TenantManager,
|
||||
) -> Result<Handle<T>, GetError<T>> {
|
||||
let miss: ShardSelector = {
|
||||
let routing_state = self.shard_routing(timeline_id, shard_selector);
|
||||
match routing_state {
|
||||
RoutingResult::FastPath(handle) => return Ok(handle),
|
||||
RoutingResult::SlowPath(key) => match self.map.get(&key) {
|
||||
Some(cached) => match cached.upgrade() {
|
||||
Ok(upgraded) => return Ok(upgraded),
|
||||
Err(HandleUpgradeError::ShutDown) => {
|
||||
// TODO: dedup with shard_routing()
|
||||
Some(upgraded) => return Ok(Handle(upgraded)),
|
||||
None => {
|
||||
trace!("handle cache stale");
|
||||
self.map.remove(&key).unwrap();
|
||||
continue;
|
||||
ShardSelector::Known(key.shard_index)
|
||||
}
|
||||
},
|
||||
None => break ShardSelector::Known(key.shard_index),
|
||||
None => ShardSelector::Known(key.shard_index),
|
||||
},
|
||||
RoutingResult::NeedConsultTenantManager => break shard_selector,
|
||||
RoutingResult::NeedConsultTenantManager => shard_selector,
|
||||
}
|
||||
};
|
||||
self.get_miss(timeline_id, miss, tenant_manager).await
|
||||
@@ -383,7 +302,7 @@ impl<T: Types> Cache<T> {
|
||||
let Some((first_key, first_handle)) = self.map.iter().next() else {
|
||||
return RoutingResult::NeedConsultTenantManager;
|
||||
};
|
||||
let Ok(first_handle) = first_handle.upgrade() else {
|
||||
let Some(first_handle) = first_handle.upgrade() else {
|
||||
// TODO: dedup with get()
|
||||
trace!("handle cache stale");
|
||||
let first_key_owned = *first_key;
|
||||
@@ -391,7 +310,7 @@ impl<T: Types> Cache<T> {
|
||||
continue;
|
||||
};
|
||||
|
||||
let first_handle_shard_identity = first_handle.get_shard_identity();
|
||||
let first_handle_shard_identity = first_handle.timeline.get_shard_identity();
|
||||
let make_shard_index = |shard_num: ShardNumber| ShardIndex {
|
||||
shard_number: shard_num,
|
||||
shard_count: first_handle_shard_identity.count,
|
||||
@@ -410,11 +329,11 @@ impl<T: Types> Cache<T> {
|
||||
};
|
||||
let first_handle_shard_timeline_id = ShardTimelineId {
|
||||
shard_index: first_handle_shard_identity.shard_index(),
|
||||
timeline_id: first_handle.shard_timeline_id().timeline_id,
|
||||
timeline_id: first_handle.timeline.shard_timeline_id().timeline_id,
|
||||
};
|
||||
|
||||
if need_shard_timeline_id == first_handle_shard_timeline_id {
|
||||
return RoutingResult::FastPath(first_handle);
|
||||
return RoutingResult::FastPath(Handle(first_handle));
|
||||
} else {
|
||||
return RoutingResult::SlowPath(need_shard_timeline_id);
|
||||
}
|
||||
@@ -438,30 +357,23 @@ impl<T: Types> Cache<T> {
|
||||
ShardSelector::Known(idx) => assert_eq!(idx, &key.shard_index),
|
||||
}
|
||||
|
||||
trace!("creating new HandleInner");
|
||||
let handle_inner_arc = Arc::new(Mutex::new(HandleInner::KeepingTimelineGateOpen {
|
||||
gate_guard: Arc::new(
|
||||
// this enter() is expensive in production code because
|
||||
// it hits the global Arc<Timeline>::gate refcounts
|
||||
match timeline.gate().enter() {
|
||||
Ok(guard) => guard,
|
||||
Err(_) => {
|
||||
return Err(GetError::TimelineGateClosed);
|
||||
}
|
||||
},
|
||||
),
|
||||
// this clone is expensive in production code because
|
||||
// it hits the global Arc<Timeline>::clone refcounts
|
||||
timeline: Arc::new(timeline.clone()),
|
||||
}));
|
||||
let handle_weak = WeakHandle {
|
||||
inner: Arc::downgrade(&handle_inner_arc),
|
||||
let gate_guard = match timeline.gate().enter() {
|
||||
Ok(guard) => guard,
|
||||
Err(_) => {
|
||||
return Err(GetError::TimelineGateClosed);
|
||||
}
|
||||
};
|
||||
let handle = handle_weak
|
||||
.upgrade()
|
||||
.ok()
|
||||
.expect("we just created it and it's not linked anywhere yet");
|
||||
{
|
||||
trace!("creating new HandleInner");
|
||||
let handle = Arc::new(
|
||||
// TODO: global metric that keeps track of the number of live HandlerTimeline instances
|
||||
// so we can identify reference cycle bugs.
|
||||
HandleInner {
|
||||
shut_down: AtomicBool::new(false),
|
||||
_gate_guard: gate_guard,
|
||||
timeline: timeline.clone(),
|
||||
},
|
||||
);
|
||||
let handle = {
|
||||
let mut lock_guard = timeline
|
||||
.per_timeline_state()
|
||||
.handles
|
||||
@@ -469,8 +381,7 @@ impl<T: Types> Cache<T> {
|
||||
.expect("mutex poisoned");
|
||||
match &mut *lock_guard {
|
||||
Some(per_timeline_state) => {
|
||||
let replaced =
|
||||
per_timeline_state.insert(self.id, Arc::clone(&handle_inner_arc));
|
||||
let replaced = per_timeline_state.insert(self.id, Arc::clone(&handle));
|
||||
assert!(replaced.is_none(), "some earlier code left a stale handle");
|
||||
match self.map.entry(key) {
|
||||
hash_map::Entry::Occupied(_o) => {
|
||||
@@ -481,7 +392,8 @@ impl<T: Types> Cache<T> {
|
||||
unreachable!()
|
||||
}
|
||||
hash_map::Entry::Vacant(v) => {
|
||||
v.insert(handle_weak);
|
||||
v.insert(Arc::downgrade(&handle));
|
||||
handle
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -489,62 +401,14 @@ impl<T: Types> Cache<T> {
|
||||
return Err(GetError::PerTimelineStateShutDown);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(handle)
|
||||
};
|
||||
Ok(Handle(handle))
|
||||
}
|
||||
Err(e) => Err(GetError::TenantManager(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum HandleUpgradeError {
|
||||
ShutDown,
|
||||
}
|
||||
|
||||
impl<T: Types> WeakHandle<T> {
|
||||
pub(crate) fn upgrade(&self) -> Result<Handle<T>, HandleUpgradeError> {
|
||||
let Some(inner) = Weak::upgrade(&self.inner) else {
|
||||
return Err(HandleUpgradeError::ShutDown);
|
||||
};
|
||||
let lock_guard = inner.lock().expect("poisoned");
|
||||
match &*lock_guard {
|
||||
HandleInner::KeepingTimelineGateOpen {
|
||||
timeline,
|
||||
gate_guard,
|
||||
} => {
|
||||
let gate_guard = Arc::clone(gate_guard);
|
||||
let timeline = Arc::clone(timeline);
|
||||
drop(lock_guard);
|
||||
Ok(Handle {
|
||||
timeline,
|
||||
gate_guard,
|
||||
inner,
|
||||
})
|
||||
}
|
||||
HandleInner::ShutDown => Err(HandleUpgradeError::ShutDown),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_same_handle_as(&self, other: &WeakHandle<T>) -> bool {
|
||||
Weak::ptr_eq(&self.inner, &other.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Types> std::ops::Deref for Handle<T> {
|
||||
type Target = T::Timeline;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.timeline
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Types> Handle<T> {
|
||||
pub(crate) fn downgrade(&self) -> WeakHandle<T> {
|
||||
WeakHandle {
|
||||
inner: Arc::downgrade(&self.inner),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Types> PerTimelineState<T> {
|
||||
/// After this method returns, [`Cache::get`] will never again return a [`Handle`]
|
||||
/// to the [`Types::Timeline`] that embeds this per-timeline state.
|
||||
@@ -566,62 +430,43 @@ impl<T: Types> PerTimelineState<T> {
|
||||
trace!("already shut down");
|
||||
return;
|
||||
};
|
||||
for handle_inner_arc in handles.values() {
|
||||
for handle in handles.values() {
|
||||
// Make hits fail.
|
||||
let mut lock_guard = handle_inner_arc.lock().expect("poisoned");
|
||||
lock_guard.shutdown();
|
||||
handle.shut_down.store(true, Ordering::Relaxed);
|
||||
}
|
||||
drop(handles);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Types> std::ops::Deref for Handle<T> {
|
||||
type Target = T::Timeline;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0.timeline
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<T: Types> Drop for HandleInner<T> {
|
||||
fn drop(&mut self) {
|
||||
trace!("HandleInner dropped");
|
||||
}
|
||||
}
|
||||
|
||||
// When dropping a [`Cache`], prune its handles in the [`PerTimelineState`] to break the reference cycle.
|
||||
impl<T: Types> Drop for Cache<T> {
|
||||
fn drop(&mut self) {
|
||||
for (
|
||||
_,
|
||||
WeakHandle {
|
||||
inner: handle_inner_weak,
|
||||
},
|
||||
) in self.map.drain()
|
||||
{
|
||||
let Some(handle_inner_arc) = handle_inner_weak.upgrade() else {
|
||||
continue;
|
||||
};
|
||||
let Some(handle_timeline) = handle_inner_arc
|
||||
// locking rules: drop lock before acquiring other lock below
|
||||
.lock()
|
||||
.expect("poisoned")
|
||||
.shutdown()
|
||||
else {
|
||||
// Concurrent PerTimelineState::shutdown.
|
||||
continue;
|
||||
};
|
||||
// Clean up per_timeline_state so the HandleInner allocation can be dropped.
|
||||
let per_timeline_state = handle_timeline.per_timeline_state();
|
||||
let mut handles_lock_guard = per_timeline_state.handles.lock().expect("mutex poisoned");
|
||||
let Some(handles) = &mut *handles_lock_guard else {
|
||||
continue;
|
||||
};
|
||||
let Some(removed_handle_inner_arc) = handles.remove(&self.id) else {
|
||||
// Concurrent PerTimelineState::shutdown.
|
||||
continue;
|
||||
};
|
||||
drop(handles_lock_guard); // locking rules!
|
||||
assert!(Arc::ptr_eq(&removed_handle_inner_arc, &handle_inner_arc));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Types> HandleInner<T> {
|
||||
fn shutdown(&mut self) -> Option<Arc<T::Timeline>> {
|
||||
match std::mem::replace(self, HandleInner::ShutDown) {
|
||||
HandleInner::KeepingTimelineGateOpen { timeline, .. } => Some(timeline),
|
||||
HandleInner::ShutDown => {
|
||||
// Duplicate shutdowns are possible because both Cache::drop and PerTimelineState::shutdown
|
||||
// may do it concurrently, but locking rules disallow holding per-timeline-state lock and
|
||||
// the handle lock at the same time.
|
||||
None
|
||||
for (_, weak) in self.map.drain() {
|
||||
if let Some(strong) = weak.upgrade() {
|
||||
// handle is still being kept alive in PerTimelineState
|
||||
let timeline = strong.timeline.per_timeline_state();
|
||||
let mut handles = timeline.handles.lock().expect("mutex poisoned");
|
||||
if let Some(handles) = &mut *handles {
|
||||
let Some(removed) = handles.remove(&self.id) else {
|
||||
// There could have been a shutdown inbetween us upgrading the weak and locking the mutex.
|
||||
continue;
|
||||
};
|
||||
assert!(Arc::ptr_eq(&removed, &strong));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -629,8 +474,6 @@ impl<T: Types> HandleInner<T> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Weak;
|
||||
|
||||
use pageserver_api::{
|
||||
key::{rel_block_to_key, Key, DBDIR_KEY},
|
||||
models::ShardParameters,
|
||||
@@ -740,13 +583,39 @@ mod tests {
|
||||
//
|
||||
// fill the cache
|
||||
//
|
||||
assert_eq!(
|
||||
(Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
|
||||
(2, 1),
|
||||
"strong: shard0, mgr; weak: myself"
|
||||
);
|
||||
|
||||
let handle: Handle<_> = cache
|
||||
.get(timeline_id, ShardSelector::Page(key), &mgr)
|
||||
.await
|
||||
.expect("we have the timeline");
|
||||
let handle_inner_weak = Arc::downgrade(&handle.0);
|
||||
assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));
|
||||
assert_eq!(
|
||||
(
|
||||
Weak::strong_count(&handle_inner_weak),
|
||||
Weak::weak_count(&handle_inner_weak)
|
||||
),
|
||||
(2, 2),
|
||||
"strong: handle, per_timeline_state, weak: handle_inner_weak, cache"
|
||||
);
|
||||
assert_eq!(cache.map.len(), 1);
|
||||
|
||||
assert_eq!(
|
||||
(Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
|
||||
(3, 1),
|
||||
"strong: handleinner(per_timeline_state), shard0, mgr; weak: myself"
|
||||
);
|
||||
drop(handle);
|
||||
assert_eq!(
|
||||
(Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
|
||||
(3, 1),
|
||||
"strong: handleinner(per_timeline_state), shard0, mgr; weak: myself"
|
||||
);
|
||||
|
||||
//
|
||||
// demonstrate that Handle holds up gate closure
|
||||
@@ -771,11 +640,21 @@ mod tests {
|
||||
// SHUTDOWN
|
||||
shard0.per_timeline_state.shutdown(); // keeping handle alive across shutdown
|
||||
|
||||
assert_eq!(
|
||||
1,
|
||||
Weak::strong_count(&handle_inner_weak),
|
||||
"through local var handle"
|
||||
);
|
||||
assert_eq!(
|
||||
cache.map.len(),
|
||||
1,
|
||||
"this is an implementation detail but worth pointing out: we can't clear the cache from shutdown(), it's cleared on first access after"
|
||||
);
|
||||
assert_eq!(
|
||||
(Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
|
||||
(3, 1),
|
||||
"strong: handleinner(via handle), shard0, mgr; weak: myself"
|
||||
);
|
||||
|
||||
// this handle is perfectly usable
|
||||
handle.getpage();
|
||||
@@ -799,6 +678,16 @@ mod tests {
|
||||
}
|
||||
|
||||
drop(handle);
|
||||
assert_eq!(
|
||||
0,
|
||||
Weak::strong_count(&handle_inner_weak),
|
||||
"the HandleInner destructor already ran"
|
||||
);
|
||||
assert_eq!(
|
||||
(Arc::strong_count(&shard0), Arc::weak_count(&shard0)),
|
||||
(2, 1),
|
||||
"strong: shard0, mgr; weak: myself"
|
||||
);
|
||||
|
||||
// closing gate succeeds after dropping handle
|
||||
tokio::select! {
|
||||
@@ -817,8 +706,10 @@ mod tests {
|
||||
assert_eq!(cache.map.len(), 0);
|
||||
|
||||
// ensure all refs to shard0 are gone and we're not leaking anything
|
||||
let myself = Weak::clone(&shard0.myself);
|
||||
drop(shard0);
|
||||
drop(mgr);
|
||||
assert_eq!(Weak::strong_count(&myself), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1057,11 +948,15 @@ mod tests {
|
||||
handle
|
||||
};
|
||||
handle.getpage();
|
||||
used_handles.push(Arc::downgrade(&handle.timeline));
|
||||
used_handles.push(Arc::downgrade(&handle.0));
|
||||
}
|
||||
|
||||
// No handles exist, thus gates are closed and don't require shutdown.
|
||||
// Thus the gate should close immediately, even without shutdown.
|
||||
// No handles exist, thus gates are closed and don't require shutdown
|
||||
assert!(used_handles
|
||||
.iter()
|
||||
.all(|weak| Weak::strong_count(weak) == 0));
|
||||
|
||||
// ... thus the gate should close immediately, even without shutdown
|
||||
tokio::select! {
|
||||
_ = shard0.gate.close() => { }
|
||||
_ = tokio::time::sleep(FOREVER) => {
|
||||
@@ -1069,172 +964,4 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn test_weak_handles() {
|
||||
crate::tenant::harness::setup_logging();
|
||||
let timeline_id = TimelineId::generate();
|
||||
let shard0 = Arc::new_cyclic(|myself| StubTimeline {
|
||||
gate: Default::default(),
|
||||
id: timeline_id,
|
||||
shard: ShardIdentity::unsharded(),
|
||||
per_timeline_state: PerTimelineState::default(),
|
||||
myself: myself.clone(),
|
||||
});
|
||||
let mgr = StubManager {
|
||||
shards: vec![shard0.clone()],
|
||||
};
|
||||
|
||||
let refcount_start = Arc::strong_count(&shard0);
|
||||
|
||||
let key = DBDIR_KEY;
|
||||
|
||||
let mut cache = Cache::<TestTypes>::default();
|
||||
|
||||
let handle = cache
|
||||
.get(timeline_id, ShardSelector::Page(key), &mgr)
|
||||
.await
|
||||
.expect("we have the timeline");
|
||||
assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));
|
||||
|
||||
let weak_handle = handle.downgrade();
|
||||
|
||||
drop(handle);
|
||||
|
||||
let upgraded_handle = weak_handle.upgrade().ok().expect("we can upgrade it");
|
||||
|
||||
// Start shutdown
|
||||
shard0.per_timeline_state.shutdown();
|
||||
|
||||
// Upgrades during shutdown don't work, even if upgraded_handle exists.
|
||||
weak_handle
|
||||
.upgrade()
|
||||
.err()
|
||||
.expect("can't upgrade weak handle as soon as shutdown started");
|
||||
|
||||
// But upgraded_handle is still alive, so the gate won't close.
|
||||
tokio::select! {
|
||||
_ = shard0.gate.close() => {
|
||||
panic!("handle is keeping gate open");
|
||||
}
|
||||
_ = tokio::time::sleep(FOREVER) => { }
|
||||
}
|
||||
|
||||
// Drop the last handle.
|
||||
drop(upgraded_handle);
|
||||
|
||||
// The gate should close now, despite there still being a weak_handle.
|
||||
tokio::select! {
|
||||
_ = shard0.gate.close() => { }
|
||||
_ = tokio::time::sleep(FOREVER) => {
|
||||
panic!("only strong handle is dropped and we shut down per-timeline-state")
|
||||
}
|
||||
}
|
||||
|
||||
// The weak handle still can't be upgraded.
|
||||
weak_handle
|
||||
.upgrade()
|
||||
.err()
|
||||
.expect("still shouldn't be able to upgrade the weak handle");
|
||||
|
||||
// There should be no strong references to the timeline object except the one on "stack".
|
||||
assert_eq!(Arc::strong_count(&shard0), refcount_start);
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn test_reference_cycle_broken_when_cache_is_dropped() {
|
||||
crate::tenant::harness::setup_logging();
|
||||
let timeline_id = TimelineId::generate();
|
||||
let shard0 = Arc::new_cyclic(|myself| StubTimeline {
|
||||
gate: Default::default(),
|
||||
id: timeline_id,
|
||||
shard: ShardIdentity::unsharded(),
|
||||
per_timeline_state: PerTimelineState::default(),
|
||||
myself: myself.clone(),
|
||||
});
|
||||
let mgr = StubManager {
|
||||
shards: vec![shard0.clone()],
|
||||
};
|
||||
let key = DBDIR_KEY;
|
||||
|
||||
let mut cache = Cache::<TestTypes>::default();
|
||||
|
||||
// helper to check if a handle is referenced by per_timeline_state
|
||||
let per_timeline_state_refs_handle = |handle_weak: &Weak<Mutex<HandleInner<_>>>| {
|
||||
let per_timeline_state = shard0.per_timeline_state.handles.lock().unwrap();
|
||||
let per_timeline_state = per_timeline_state.as_ref().unwrap();
|
||||
per_timeline_state
|
||||
.values()
|
||||
.any(|v| Weak::ptr_eq(&Arc::downgrade(v), handle_weak))
|
||||
};
|
||||
|
||||
// Fill the cache.
|
||||
let handle = cache
|
||||
.get(timeline_id, ShardSelector::Page(key), &mgr)
|
||||
.await
|
||||
.expect("we have the timeline");
|
||||
assert!(Weak::ptr_eq(&handle.myself, &shard0.myself));
|
||||
let handle_inner_weak = Arc::downgrade(&handle.inner);
|
||||
assert!(
|
||||
per_timeline_state_refs_handle(&handle_inner_weak),
|
||||
"we still hold `handle` _and_ haven't dropped `cache` yet"
|
||||
);
|
||||
|
||||
// Drop the cache.
|
||||
drop(cache);
|
||||
|
||||
assert!(
|
||||
!(per_timeline_state_refs_handle(&handle_inner_weak)),
|
||||
"nothing should reference the handle allocation anymore"
|
||||
);
|
||||
assert!(
|
||||
Weak::upgrade(&handle_inner_weak).is_some(),
|
||||
"the local `handle` still keeps the allocation alive"
|
||||
);
|
||||
// but obviously the cache is gone so no new allocations can be handed out.
|
||||
|
||||
// Drop handle.
|
||||
drop(handle);
|
||||
assert!(
|
||||
Weak::upgrade(&handle_inner_weak).is_none(),
|
||||
"the local `handle` is dropped, so the allocation should be dropped by now"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn test_reference_cycle_broken_when_per_timeline_state_shutdown() {
|
||||
crate::tenant::harness::setup_logging();
|
||||
let timeline_id = TimelineId::generate();
|
||||
let shard0 = Arc::new_cyclic(|myself| StubTimeline {
|
||||
gate: Default::default(),
|
||||
id: timeline_id,
|
||||
shard: ShardIdentity::unsharded(),
|
||||
per_timeline_state: PerTimelineState::default(),
|
||||
myself: myself.clone(),
|
||||
});
|
||||
let mgr = StubManager {
|
||||
shards: vec![shard0.clone()],
|
||||
};
|
||||
let key = DBDIR_KEY;
|
||||
|
||||
let mut cache = Cache::<TestTypes>::default();
|
||||
let handle = cache
|
||||
.get(timeline_id, ShardSelector::Page(key), &mgr)
|
||||
.await
|
||||
.expect("we have the timeline");
|
||||
// grab a weak reference to the inner so can later try to Weak::upgrade it and assert that fails
|
||||
let handle_inner_weak = Arc::downgrade(&handle.inner);
|
||||
|
||||
// drop the handle, obviously the lifetime of `inner` is at least as long as each strong reference to it
|
||||
drop(handle);
|
||||
assert!(Weak::upgrade(&handle_inner_weak).is_some(), "can still");
|
||||
|
||||
// Shutdown the per_timeline_state.
|
||||
shard0.per_timeline_state.shutdown();
|
||||
assert!(Weak::upgrade(&handle_inner_weak).is_none(), "can no longer");
|
||||
|
||||
// cache only contains Weak's, so, it can outlive the per_timeline_state without
|
||||
// Drop explicitly solely to make this point.
|
||||
drop(cache);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,45 +337,16 @@ impl OpenLayerManager {
|
||||
compact_to: &[ResidentLayer],
|
||||
metrics: &TimelineMetrics,
|
||||
) {
|
||||
// gc-compaction could contain layer rewrites. We need to delete the old layers and insert the new ones.
|
||||
|
||||
// Match the old layers with the new layers
|
||||
let mut add_layers = HashMap::new();
|
||||
let mut rewrite_layers = HashMap::new();
|
||||
let mut drop_layers = HashMap::new();
|
||||
for layer in compact_from {
|
||||
drop_layers.insert(layer.layer_desc().key(), layer.clone());
|
||||
}
|
||||
for layer in compact_to {
|
||||
if let Some(old_layer) = drop_layers.remove(&layer.layer_desc().key()) {
|
||||
rewrite_layers.insert(layer.layer_desc().key(), (old_layer.clone(), layer.clone()));
|
||||
} else {
|
||||
add_layers.insert(layer.layer_desc().key(), layer.clone());
|
||||
}
|
||||
}
|
||||
let add_layers = add_layers.values().cloned().collect::<Vec<_>>();
|
||||
let drop_layers = drop_layers.values().cloned().collect::<Vec<_>>();
|
||||
let rewrite_layers = rewrite_layers.values().cloned().collect::<Vec<_>>();
|
||||
|
||||
self.rewrite_layers_inner(&rewrite_layers, &drop_layers, &add_layers, metrics);
|
||||
// We can simply reuse compact l0 logic. Use a different function name to indicate a different type of layer map modification.
|
||||
self.finish_compact_l0(compact_from, compact_to, metrics)
|
||||
}
|
||||
|
||||
/// Called post-compaction when some previous generation image layers were trimmed.
|
||||
pub fn rewrite_layers(
|
||||
pub(crate) fn rewrite_layers(
|
||||
&mut self,
|
||||
rewrite_layers: &[(Layer, ResidentLayer)],
|
||||
drop_layers: &[Layer],
|
||||
metrics: &TimelineMetrics,
|
||||
) {
|
||||
self.rewrite_layers_inner(rewrite_layers, drop_layers, &[], metrics);
|
||||
}
|
||||
|
||||
fn rewrite_layers_inner(
|
||||
&mut self,
|
||||
rewrite_layers: &[(Layer, ResidentLayer)],
|
||||
drop_layers: &[Layer],
|
||||
add_layers: &[ResidentLayer],
|
||||
metrics: &TimelineMetrics,
|
||||
) {
|
||||
let mut updates = self.layer_map.batch_update();
|
||||
for (old_layer, new_layer) in rewrite_layers {
|
||||
@@ -411,10 +382,6 @@ impl OpenLayerManager {
|
||||
for l in drop_layers {
|
||||
Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);
|
||||
}
|
||||
for l in add_layers {
|
||||
Self::insert_historic_layer(l.as_ref().clone(), &mut updates, &mut self.layer_fmgr);
|
||||
metrics.record_new_file_metrics(l.layer_desc().file_size);
|
||||
}
|
||||
updates.flush();
|
||||
}
|
||||
|
||||
|
||||
@@ -2,12 +2,11 @@ use std::sync::Arc;
|
||||
|
||||
use pageserver_api::models::{TenantState, TimelineState};
|
||||
|
||||
use super::delete::{delete_local_timeline_directory, DeletionGuard};
|
||||
use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
|
||||
use super::Timeline;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
|
||||
use crate::tenant::timeline::delete::{make_timeline_delete_guard, TimelineDeleteGuardKind};
|
||||
use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
|
||||
use crate::tenant::{OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded};
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub(crate) enum OffloadError {
|
||||
@@ -31,16 +30,19 @@ impl From<TenantManifestError> for OffloadError {
|
||||
}
|
||||
|
||||
pub(crate) async fn offload_timeline(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<(), OffloadError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
tracing::info!("offloading archived timeline");
|
||||
|
||||
let (timeline, guard) = make_timeline_delete_guard(
|
||||
let allow_offloaded_children = true;
|
||||
let set_stopping = false;
|
||||
let (timeline, guard) = DeleteTimelineFlow::prepare(
|
||||
tenant,
|
||||
timeline.timeline_id,
|
||||
TimelineDeleteGuardKind::Offload,
|
||||
allow_offloaded_children,
|
||||
set_stopping,
|
||||
)
|
||||
.map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
|
||||
|
||||
@@ -104,11 +106,11 @@ pub(crate) async fn offload_timeline(
|
||||
}
|
||||
|
||||
/// It is important that this gets called when DeletionGuard is being held.
|
||||
/// For more context see comments in [`make_timeline_delete_guard`]
|
||||
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
|
||||
///
|
||||
/// Returns the strong count of the timeline `Arc`
|
||||
fn remove_timeline_from_tenant(
|
||||
tenant: &Tenant,
|
||||
tenant: &TenantShard,
|
||||
timeline: &Timeline,
|
||||
_: &DeletionGuard, // using it as a witness
|
||||
) -> usize {
|
||||
|
||||
@@ -8,7 +8,7 @@ use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
import_datadir,
|
||||
tenant::{CreateTimelineIdempotency, Tenant, TimelineOrOffloaded},
|
||||
tenant::{CreateTimelineIdempotency, TenantShard, TimelineOrOffloaded},
|
||||
};
|
||||
|
||||
use super::Timeline;
|
||||
@@ -16,19 +16,19 @@ use super::Timeline;
|
||||
/// A timeline with some of its files on disk, being initialized.
|
||||
/// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
|
||||
/// its local files are removed. If we crash while this class exists, then the timeline's local
|
||||
/// state is cleaned up during [`Tenant::clean_up_timelines`], because the timeline's content isn't in remote storage.
|
||||
/// state is cleaned up during [`TenantShard::clean_up_timelines`], because the timeline's content isn't in remote storage.
|
||||
///
|
||||
/// The caller is responsible for proper timeline data filling before the final init.
|
||||
#[must_use]
|
||||
pub struct UninitializedTimeline<'t> {
|
||||
pub(crate) owning_tenant: &'t Tenant,
|
||||
pub(crate) owning_tenant: &'t TenantShard,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
}
|
||||
|
||||
impl<'t> UninitializedTimeline<'t> {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &'t Tenant,
|
||||
owning_tenant: &'t TenantShard,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
) -> Self {
|
||||
@@ -94,7 +94,7 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
/// Prepares timeline data by loading it from the basebackup archive.
|
||||
pub(crate) async fn import_basebackup_from_tar(
|
||||
self,
|
||||
tenant: Arc<Tenant>,
|
||||
tenant: Arc<TenantShard>,
|
||||
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
|
||||
base_lsn: Lsn,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
@@ -163,17 +163,17 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
|
||||
error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
|
||||
}
|
||||
}
|
||||
// Having cleaned up, we can release this TimelineId in `[Tenant::timelines_creating]` to allow other
|
||||
// Having cleaned up, we can release this TimelineId in `[TenantShard::timelines_creating]` to allow other
|
||||
// timeline creation attempts under this TimelineId to proceed
|
||||
drop(create_guard);
|
||||
}
|
||||
|
||||
/// A guard for timeline creations in process: as long as this object exists, the timeline ID
|
||||
/// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||
/// is kept in `[TenantShard::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||
#[must_use]
|
||||
pub(crate) struct TimelineCreateGuard {
|
||||
pub(crate) _tenant_gate_guard: GateGuard,
|
||||
pub(crate) owning_tenant: Arc<Tenant>,
|
||||
pub(crate) owning_tenant: Arc<TenantShard>,
|
||||
pub(crate) timeline_id: TimelineId,
|
||||
pub(crate) timeline_path: Utf8PathBuf,
|
||||
pub(crate) idempotency: CreateTimelineIdempotency,
|
||||
@@ -199,7 +199,7 @@ pub(crate) enum TimelineExclusionError {
|
||||
|
||||
impl TimelineCreateGuard {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &Arc<Tenant>,
|
||||
owning_tenant: &Arc<TenantShard>,
|
||||
timeline_id: TimelineId,
|
||||
timeline_path: Utf8PathBuf,
|
||||
idempotency: CreateTimelineIdempotency,
|
||||
|
||||
@@ -1117,7 +1117,7 @@ impl ReconnectReason {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::tenant::harness::{TenantHarness, TIMELINE_ID};
|
||||
use crate::tenant::harness::{TenantShardHarness, TIMELINE_ID};
|
||||
use pageserver_api::config::defaults::DEFAULT_WAL_RECEIVER_PROTOCOL;
|
||||
use url::Host;
|
||||
|
||||
@@ -1141,7 +1141,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn no_connection_no_candidate() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("no_connection_no_candidate").await?;
|
||||
let harness = TenantShardHarness::create("no_connection_no_candidate").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
@@ -1174,7 +1174,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn connection_no_candidate() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("connection_no_candidate").await?;
|
||||
let harness = TenantShardHarness::create("connection_no_candidate").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
@@ -1239,7 +1239,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn no_connection_candidate() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("no_connection_candidate").await?;
|
||||
let harness = TenantShardHarness::create("no_connection_candidate").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
@@ -1302,7 +1302,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn candidate_with_many_connection_failures() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("candidate_with_many_connection_failures").await?;
|
||||
let harness = TenantShardHarness::create("candidate_with_many_connection_failures").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
@@ -1342,7 +1342,7 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn lsn_wal_over_threshold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("lsn_wal_over_threshcurrent_candidate").await?;
|
||||
let harness = TenantShardHarness::create("lsn_wal_over_threshcurrent_candidate").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
let now = Utc::now().naive_utc();
|
||||
@@ -1409,7 +1409,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn timeout_connection_threshold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness =
|
||||
TenantHarness::create("timeout_connection_threshold_current_candidate").await?;
|
||||
TenantShardHarness::create("timeout_connection_threshold_current_candidate").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
let now = Utc::now().naive_utc();
|
||||
@@ -1472,7 +1472,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn timeout_wal_over_threshold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("timeout_wal_over_threshold_current_candidate").await?;
|
||||
let harness =
|
||||
TenantShardHarness::create("timeout_wal_over_threshold_current_candidate").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
let new_lsn = Lsn(100_100).align();
|
||||
@@ -1540,7 +1541,7 @@ mod tests {
|
||||
|
||||
const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";
|
||||
|
||||
async fn dummy_state(harness: &TenantHarness) -> ConnectionManagerState {
|
||||
async fn dummy_state(harness: &TenantShardHarness) -> ConnectionManagerState {
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let timeline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx)
|
||||
@@ -1575,7 +1576,7 @@ mod tests {
|
||||
// and pageserver should prefer to connect to it.
|
||||
let test_az = Some("test_az".to_owned());
|
||||
|
||||
let harness = TenantHarness::create("switch_to_same_availability_zone").await?;
|
||||
let harness = TenantShardHarness::create("switch_to_same_availability_zone").await?;
|
||||
let mut state = dummy_state(&harness).await;
|
||||
state.conf.availability_zone.clone_from(&test_az);
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
|
||||
@@ -140,7 +140,7 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
|
||||
let (replication_client, connection) = {
|
||||
let mut config = wal_source_connconf.to_tokio_postgres_config();
|
||||
config.application_name(format!("pageserver-{}", node.0).as_str());
|
||||
config.application_name("pageserver");
|
||||
config.replication_mode(tokio_postgres::config::ReplicationMode::Physical);
|
||||
match time::timeout(connect_timeout, config.connect(postgres::NoTls)).await {
|
||||
Ok(client_and_conn) => client_and_conn?,
|
||||
|
||||
@@ -563,7 +563,7 @@ impl UploadOp {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::tenant::harness::{TenantHarness, TIMELINE_ID};
|
||||
use crate::tenant::harness::{TenantShardHarness, TIMELINE_ID};
|
||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||
use crate::tenant::storage_layer::Layer;
|
||||
use crate::tenant::Timeline;
|
||||
@@ -621,7 +621,7 @@ mod tests {
|
||||
|
||||
runtime
|
||||
.block_on(async {
|
||||
let harness = TenantHarness::create(test_name).await?;
|
||||
let harness = TenantShardHarness::create(test_name).await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||
|
||||
@@ -35,7 +35,6 @@ use crate::virtual_file::{self, VirtualFile};
|
||||
pub struct BlobMeta {
|
||||
pub key: Key,
|
||||
pub lsn: Lsn,
|
||||
pub will_init: bool,
|
||||
}
|
||||
|
||||
/// A view into the vectored blobs read buffer.
|
||||
@@ -311,15 +310,7 @@ pub enum BlobFlag {
|
||||
/// * Iterate over the collected blobs and coalesce them into reads at the end
|
||||
pub struct VectoredReadPlanner {
|
||||
// Track all the blob offsets. Start offsets must be ordered.
|
||||
// Values in the value tuples are:
|
||||
// (
|
||||
// lsn of the blob,
|
||||
// start offset of the blob in the underlying file,
|
||||
// end offset of the blob in the underlying file,
|
||||
// whether the blob initializes the page image or not
|
||||
// see [`pageserver_api::record::NeonWalRecord::will_init`]
|
||||
// )
|
||||
blobs: BTreeMap<Key, Vec<(Lsn, u64, u64, bool)>>,
|
||||
blobs: BTreeMap<Key, Vec<(Lsn, u64, u64)>>,
|
||||
// Arguments for previous blob passed into [`VectoredReadPlanner::handle`]
|
||||
prev: Option<(Key, Lsn, u64, BlobFlag)>,
|
||||
|
||||
@@ -380,12 +371,12 @@ impl VectoredReadPlanner {
|
||||
match flag {
|
||||
BlobFlag::None => {
|
||||
let blobs_for_key = self.blobs.entry(key).or_default();
|
||||
blobs_for_key.push((lsn, start_offset, end_offset, false));
|
||||
blobs_for_key.push((lsn, start_offset, end_offset));
|
||||
}
|
||||
BlobFlag::ReplaceAll => {
|
||||
let blobs_for_key = self.blobs.entry(key).or_default();
|
||||
blobs_for_key.clear();
|
||||
blobs_for_key.push((lsn, start_offset, end_offset, true));
|
||||
blobs_for_key.push((lsn, start_offset, end_offset));
|
||||
}
|
||||
BlobFlag::Ignore => {}
|
||||
}
|
||||
@@ -396,17 +387,11 @@ impl VectoredReadPlanner {
|
||||
let mut reads = Vec::new();
|
||||
|
||||
for (key, blobs_for_key) in self.blobs {
|
||||
for (lsn, start_offset, end_offset, will_init) in blobs_for_key {
|
||||
for (lsn, start_offset, end_offset) in blobs_for_key {
|
||||
let extended = match &mut current_read_builder {
|
||||
Some(read_builder) => read_builder.extend(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init,
|
||||
},
|
||||
),
|
||||
Some(read_builder) => {
|
||||
read_builder.extend(start_offset, end_offset, BlobMeta { key, lsn })
|
||||
}
|
||||
None => VectoredReadExtended::No,
|
||||
};
|
||||
|
||||
@@ -414,11 +399,7 @@ impl VectoredReadPlanner {
|
||||
let next_read_builder = ChunkedVectoredReadBuilder::new(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init,
|
||||
},
|
||||
BlobMeta { key, lsn },
|
||||
self.max_read_size,
|
||||
);
|
||||
|
||||
@@ -546,7 +527,7 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
pub struct StreamingVectoredReadPlanner {
|
||||
read_builder: Option<ChunkedVectoredReadBuilder>,
|
||||
// Arguments for previous blob passed into [`StreamingVectoredReadPlanner::handle`]
|
||||
prev: Option<(Key, Lsn, u64, bool)>,
|
||||
prev: Option<(Key, Lsn, u64)>,
|
||||
/// Max read size per batch. This is not a strict limit. If there are [0, 100) and [100, 200), while the `max_read_size` is 150,
|
||||
/// we will produce a single batch instead of split them.
|
||||
max_read_size: u64,
|
||||
@@ -569,47 +550,27 @@ impl StreamingVectoredReadPlanner {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle(
|
||||
&mut self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
offset: u64,
|
||||
will_init: bool,
|
||||
) -> Option<VectoredRead> {
|
||||
pub fn handle(&mut self, key: Key, lsn: Lsn, offset: u64) -> Option<VectoredRead> {
|
||||
// Implementation note: internally lag behind by one blob such that
|
||||
// we have a start and end offset when initialising [`VectoredRead`]
|
||||
let (prev_key, prev_lsn, prev_offset, prev_will_init) = match self.prev {
|
||||
let (prev_key, prev_lsn, prev_offset) = match self.prev {
|
||||
None => {
|
||||
self.prev = Some((key, lsn, offset, will_init));
|
||||
self.prev = Some((key, lsn, offset));
|
||||
return None;
|
||||
}
|
||||
Some(prev) => prev,
|
||||
};
|
||||
|
||||
let res = self.add_blob(
|
||||
prev_key,
|
||||
prev_lsn,
|
||||
prev_offset,
|
||||
offset,
|
||||
false,
|
||||
prev_will_init,
|
||||
);
|
||||
let res = self.add_blob(prev_key, prev_lsn, prev_offset, offset, false);
|
||||
|
||||
self.prev = Some((key, lsn, offset, will_init));
|
||||
self.prev = Some((key, lsn, offset));
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
pub fn handle_range_end(&mut self, offset: u64) -> Option<VectoredRead> {
|
||||
let res = if let Some((prev_key, prev_lsn, prev_offset, prev_will_init)) = self.prev {
|
||||
self.add_blob(
|
||||
prev_key,
|
||||
prev_lsn,
|
||||
prev_offset,
|
||||
offset,
|
||||
true,
|
||||
prev_will_init,
|
||||
)
|
||||
let res = if let Some((prev_key, prev_lsn, prev_offset)) = self.prev {
|
||||
self.add_blob(prev_key, prev_lsn, prev_offset, offset, true)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -626,19 +587,10 @@ impl StreamingVectoredReadPlanner {
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
is_last_blob_in_read: bool,
|
||||
will_init: bool,
|
||||
) -> Option<VectoredRead> {
|
||||
match &mut self.read_builder {
|
||||
Some(read_builder) => {
|
||||
let extended = read_builder.extend(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init,
|
||||
},
|
||||
);
|
||||
let extended = read_builder.extend(start_offset, end_offset, BlobMeta { key, lsn });
|
||||
assert_eq!(extended, VectoredReadExtended::Yes);
|
||||
}
|
||||
None => {
|
||||
@@ -646,11 +598,7 @@ impl StreamingVectoredReadPlanner {
|
||||
Some(ChunkedVectoredReadBuilder::new_streaming(
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta {
|
||||
key,
|
||||
lsn,
|
||||
will_init,
|
||||
},
|
||||
BlobMeta { key, lsn },
|
||||
))
|
||||
};
|
||||
}
|
||||
@@ -864,7 +812,7 @@ mod tests {
|
||||
let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1000);
|
||||
let mut reads = Vec::new();
|
||||
for (key, lsn, offset, _) in blob_descriptions.clone() {
|
||||
reads.extend(planner.handle(key, lsn, offset, false));
|
||||
reads.extend(planner.handle(key, lsn, offset));
|
||||
}
|
||||
reads.extend(planner.handle_range_end(652 * 1024));
|
||||
|
||||
@@ -902,7 +850,7 @@ mod tests {
|
||||
let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 2);
|
||||
let mut reads = Vec::new();
|
||||
for (key, lsn, offset, _) in blob_descriptions.clone() {
|
||||
reads.extend(planner.handle(key, lsn, offset, false));
|
||||
reads.extend(planner.handle(key, lsn, offset));
|
||||
}
|
||||
reads.extend(planner.handle_range_end(652 * 1024));
|
||||
|
||||
@@ -927,7 +875,7 @@ mod tests {
|
||||
{
|
||||
let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1);
|
||||
let mut reads = Vec::new();
|
||||
reads.extend(planner.handle(key, lsn, 0, false));
|
||||
reads.extend(planner.handle(key, lsn, 0));
|
||||
reads.extend(planner.handle_range_end(652 * 1024));
|
||||
assert_eq!(reads.len(), 1);
|
||||
validate_read(&reads[0], &[(key, lsn, 0, BlobFlag::None)]);
|
||||
@@ -935,8 +883,8 @@ mod tests {
|
||||
{
|
||||
let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 1);
|
||||
let mut reads = Vec::new();
|
||||
reads.extend(planner.handle(key, lsn, 0, false));
|
||||
reads.extend(planner.handle(key, lsn, 128 * 1024, false));
|
||||
reads.extend(planner.handle(key, lsn, 0));
|
||||
reads.extend(planner.handle(key, lsn, 128 * 1024));
|
||||
reads.extend(planner.handle_range_end(652 * 1024));
|
||||
assert_eq!(reads.len(), 2);
|
||||
validate_read(&reads[0], &[(key, lsn, 0, BlobFlag::None)]);
|
||||
@@ -945,8 +893,8 @@ mod tests {
|
||||
{
|
||||
let mut planner = StreamingVectoredReadPlanner::new(max_read_size, 2);
|
||||
let mut reads = Vec::new();
|
||||
reads.extend(planner.handle(key, lsn, 0, false));
|
||||
reads.extend(planner.handle(key, lsn, 128 * 1024, false));
|
||||
reads.extend(planner.handle(key, lsn, 0));
|
||||
reads.extend(planner.handle(key, lsn, 128 * 1024));
|
||||
reads.extend(planner.handle_range_end(652 * 1024));
|
||||
assert_eq!(reads.len(), 1);
|
||||
validate_read(
|
||||
@@ -975,7 +923,6 @@ mod tests {
|
||||
let meta = BlobMeta {
|
||||
key: Key::MIN,
|
||||
lsn: Lsn(0),
|
||||
will_init: false,
|
||||
};
|
||||
|
||||
for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {
|
||||
|
||||
@@ -9,12 +9,14 @@ use utils::serde_percent::Percent;
|
||||
|
||||
use pageserver_api::models::PageserverUtilization;
|
||||
|
||||
use crate::{config::PageServerConf, metrics::NODE_UTILIZATION_SCORE, tenant::mgr::TenantManager};
|
||||
use crate::{
|
||||
config::PageServerConf, metrics::NODE_UTILIZATION_SCORE, tenant::mgr::TenantShardManager,
|
||||
};
|
||||
|
||||
pub(crate) fn regenerate(
|
||||
conf: &PageServerConf,
|
||||
tenants_path: &Path,
|
||||
tenant_manager: &TenantManager,
|
||||
tenant_manager: &TenantShardManager,
|
||||
) -> anyhow::Result<PageserverUtilization> {
|
||||
let statvfs = nix::sys::statvfs::statvfs(tenants_path)
|
||||
.map_err(std::io::Error::from)
|
||||
|
||||
@@ -499,13 +499,7 @@ impl WalIngest {
|
||||
|
||||
let content = modification
|
||||
.tline
|
||||
.get_rel_page_at_lsn(
|
||||
src_rel,
|
||||
blknum,
|
||||
Version::Modified(modification),
|
||||
ctx,
|
||||
crate::tenant::storage_layer::IoConcurrency::sequential(),
|
||||
)
|
||||
.get_rel_page_at_lsn(src_rel, blknum, Version::Modified(modification), ctx)
|
||||
.await?;
|
||||
modification.put_rel_page_image(dst_rel, blknum, content)?;
|
||||
num_blocks_copied += 1;
|
||||
@@ -1495,7 +1489,6 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::tenant::harness::*;
|
||||
use crate::tenant::remote_timeline_client::{remote_initdb_archive_path, INITDB_PATH};
|
||||
use crate::tenant::storage_layer::IoConcurrency;
|
||||
use postgres_ffi::RELSEG_SIZE;
|
||||
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
@@ -1538,8 +1531,10 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_relsize() -> Result<()> {
|
||||
let (tenant, ctx) = TenantHarness::create("test_relsize").await?.load().await;
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
let (tenant, ctx) = TenantShardHarness::create("test_relsize")
|
||||
.await?
|
||||
.load()
|
||||
.await;
|
||||
let tline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
@@ -1607,13 +1602,7 @@ mod tests {
|
||||
// Check page contents at each LSN
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
0,
|
||||
Version::Lsn(Lsn(0x20)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x20)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 0 at 2")
|
||||
@@ -1621,13 +1610,7 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
0,
|
||||
Version::Lsn(Lsn(0x30)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x30)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 0 at 3")
|
||||
@@ -1635,26 +1618,14 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
0,
|
||||
Version::Lsn(Lsn(0x40)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x40)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 0 at 3")
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
1,
|
||||
Version::Lsn(Lsn(0x40)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x40)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 1 at 4")
|
||||
@@ -1662,39 +1633,21 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
0,
|
||||
Version::Lsn(Lsn(0x50)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x50)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 0 at 3")
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
1,
|
||||
Version::Lsn(Lsn(0x50)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x50)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 1 at 4")
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
2,
|
||||
Version::Lsn(Lsn(0x50)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 2 at 5")
|
||||
@@ -1717,26 +1670,14 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
0,
|
||||
Version::Lsn(Lsn(0x60)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x60)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 0 at 3")
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
1,
|
||||
Version::Lsn(Lsn(0x60)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x60)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 1 at 4")
|
||||
@@ -1751,13 +1692,7 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
2,
|
||||
Version::Lsn(Lsn(0x50)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 2 at 5")
|
||||
@@ -1790,26 +1725,14 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
0,
|
||||
Version::Lsn(Lsn(0x70)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x70)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
ZERO_PAGE
|
||||
);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
1,
|
||||
Version::Lsn(Lsn(0x70)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x70)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 1")
|
||||
@@ -1830,13 +1753,7 @@ mod tests {
|
||||
for blk in 2..1500 {
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
blk,
|
||||
Version::Lsn(Lsn(0x80)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, blk, Version::Lsn(Lsn(0x80)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
ZERO_PAGE
|
||||
@@ -1844,13 +1761,7 @@ mod tests {
|
||||
}
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
1500,
|
||||
Version::Lsn(Lsn(0x80)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, 1500, Version::Lsn(Lsn(0x80)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img("foo blk 1500")
|
||||
@@ -1863,7 +1774,7 @@ mod tests {
|
||||
// and then created it again within the same layer.
|
||||
#[tokio::test]
|
||||
async fn test_drop_extend() -> Result<()> {
|
||||
let (tenant, ctx) = TenantHarness::create("test_drop_extend")
|
||||
let (tenant, ctx) = TenantShardHarness::create("test_drop_extend")
|
||||
.await?
|
||||
.load()
|
||||
.await;
|
||||
@@ -1939,11 +1850,10 @@ mod tests {
|
||||
// and then extended it again within the same layer.
|
||||
#[tokio::test]
|
||||
async fn test_truncate_extend() -> Result<()> {
|
||||
let (tenant, ctx) = TenantHarness::create("test_truncate_extend")
|
||||
let (tenant, ctx) = TenantShardHarness::create("test_truncate_extend")
|
||||
.await?
|
||||
.load()
|
||||
.await;
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
let tline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
@@ -1996,13 +1906,7 @@ mod tests {
|
||||
let data = format!("foo blk {} at {}", blkno, lsn);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
blkno,
|
||||
Version::Lsn(lsn),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(lsn), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img(&data)
|
||||
@@ -2030,13 +1934,7 @@ mod tests {
|
||||
let data = format!("foo blk {} at {}", blkno, lsn);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
blkno,
|
||||
Version::Lsn(Lsn(0x60)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x60)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img(&data)
|
||||
@@ -2055,13 +1953,7 @@ mod tests {
|
||||
let data = format!("foo blk {} at {}", blkno, lsn);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
blkno,
|
||||
Version::Lsn(Lsn(0x50)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x50)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img(&data)
|
||||
@@ -2098,13 +1990,7 @@ mod tests {
|
||||
let data = format!("foo blk {} at {}", blkno, lsn);
|
||||
assert_eq!(
|
||||
tline
|
||||
.get_rel_page_at_lsn(
|
||||
TESTREL_A,
|
||||
blkno,
|
||||
Version::Lsn(Lsn(0x80)),
|
||||
&ctx,
|
||||
io_concurrency.clone()
|
||||
)
|
||||
.get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x80)), &ctx)
|
||||
.instrument(test_span.clone())
|
||||
.await?,
|
||||
test_img(&data)
|
||||
@@ -2118,7 +2004,10 @@ mod tests {
|
||||
/// split into multiple 1 GB segments in Postgres.
|
||||
#[tokio::test]
|
||||
async fn test_large_rel() -> Result<()> {
|
||||
let (tenant, ctx) = TenantHarness::create("test_large_rel").await?.load().await;
|
||||
let (tenant, ctx) = TenantShardHarness::create("test_large_rel")
|
||||
.await?
|
||||
.load()
|
||||
.await;
|
||||
let tline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
@@ -2226,7 +2115,9 @@ mod tests {
|
||||
let startpoint = Lsn::from_hex("14AEC08").unwrap();
|
||||
let _endpoint = Lsn::from_hex("1FFFF98").unwrap();
|
||||
|
||||
let harness = TenantHarness::create("test_ingest_real_wal").await.unwrap();
|
||||
let harness = TenantShardHarness::create("test_ingest_real_wal")
|
||||
.await
|
||||
.unwrap();
|
||||
let span = harness
|
||||
.span()
|
||||
.in_scope(|| info_span!("timeline_span", timeline_id=%TIMELINE_ID));
|
||||
|
||||
@@ -911,85 +911,57 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (entry->access_count++ == 0)
|
||||
dlist_delete(&entry->list_node);
|
||||
}
|
||||
/*-----------
|
||||
* If the chunk wasn't already in the LFC then we have these
|
||||
* options, in order of preference:
|
||||
*
|
||||
* Unless there is no space available, we can:
|
||||
* 1. Use an entry from the `holes` list, and
|
||||
* 2. Create a new entry.
|
||||
* We can always, regardless of space in the LFC:
|
||||
* 3. evict an entry from LRU, and
|
||||
* 4. ignore the write operation (the least favorite option)
|
||||
*/
|
||||
else if (lfc_ctl->used < lfc_ctl->limit)
|
||||
else
|
||||
{
|
||||
if (!dlist_is_empty(&lfc_ctl->holes))
|
||||
/*
|
||||
* We have two choices if all cache pages are pinned (i.e. used in IO
|
||||
* operations):
|
||||
*
|
||||
* 1) Wait until some of this operation is completed and pages is
|
||||
* unpinned.
|
||||
*
|
||||
* 2) Allocate one more chunk, so that specified cache size is more
|
||||
* recommendation than hard limit.
|
||||
*
|
||||
* As far as probability of such event (that all pages are pinned) is
|
||||
* considered to be very very small: there are should be very large
|
||||
* number of concurrent IO operations and them are limited by
|
||||
* max_connections, we prefer not to complicate code and use second
|
||||
* approach.
|
||||
*/
|
||||
if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
|
||||
{
|
||||
/* Cache overflow: evict least recently used chunk */
|
||||
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
|
||||
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
|
||||
}
|
||||
CriticalAssert(victim->access_count == 0);
|
||||
entry->offset = victim->offset; /* grab victim's chunk */
|
||||
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
|
||||
neon_log(DEBUG2, "Swap file cache page");
|
||||
}
|
||||
else if (!dlist_is_empty(&lfc_ctl->holes))
|
||||
{
|
||||
/* We can reuse a hole that was left behind when the LFC was shrunk previously */
|
||||
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node,
|
||||
dlist_pop_head_node(&lfc_ctl->holes));
|
||||
uint32 offset = hole->offset;
|
||||
bool hole_found;
|
||||
|
||||
hash_search_with_hash_value(lfc_hash, &hole->key,
|
||||
hole->hash, HASH_REMOVE, &hole_found);
|
||||
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
|
||||
uint32 offset = hole->offset;
|
||||
bool hole_found;
|
||||
|
||||
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found);
|
||||
CriticalAssert(hole_found);
|
||||
|
||||
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = offset; /* reuse the hole */
|
||||
entry->offset = offset; /* reuse the hole */
|
||||
}
|
||||
else
|
||||
{
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = lfc_ctl->size++;/* allocate new chunk at end
|
||||
* of file */
|
||||
entry->offset = lfc_ctl->size++; /* allocate new chunk at end
|
||||
* of file */
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We've already used up all allocated LFC entries.
|
||||
*
|
||||
* If we can clear an entry from the LRU, do that.
|
||||
* If we can't (e.g. because all other slots are being accessed)
|
||||
* then we will remove this entry from the hash and continue
|
||||
* on to the next chunk, as we may not exceed the limit.
|
||||
*/
|
||||
else if (!dlist_is_empty(&lfc_ctl->lru))
|
||||
{
|
||||
/* Cache overflow: evict least recently used chunk */
|
||||
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,
|
||||
dlist_pop_head_node(&lfc_ctl->lru));
|
||||
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
|
||||
}
|
||||
|
||||
CriticalAssert(victim->access_count == 0);
|
||||
entry->offset = victim->offset; /* grab victim's chunk */
|
||||
hash_search_with_hash_value(lfc_hash, &victim->key,
|
||||
victim->hash, HASH_REMOVE, NULL);
|
||||
neon_log(DEBUG2, "Swap file cache page");
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Can't add this chunk - we don't have the space for it */
|
||||
hash_search_with_hash_value(lfc_hash, &entry->key, hash,
|
||||
HASH_REMOVE, NULL);
|
||||
|
||||
/*
|
||||
* We can't process this chunk due to lack of space in LFC,
|
||||
* so skip to the next one
|
||||
*/
|
||||
LWLockRelease(lfc_lock);
|
||||
blkno += blocks_in_chunk;
|
||||
buf_offset += blocks_in_chunk;
|
||||
nblocks -= blocks_in_chunk;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
entry->access_count = 1;
|
||||
entry->hash = hash;
|
||||
memset(entry->bitmap, 0, sizeof entry->bitmap);
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "access/xlogrecovery.h"
|
||||
#endif
|
||||
#include "replication/logical.h"
|
||||
#include "replication/logicallauncher.h"
|
||||
#include "replication/slot.h"
|
||||
#include "replication/walsender.h"
|
||||
#include "storage/proc.h"
|
||||
@@ -435,15 +434,6 @@ _PG_init(void)
|
||||
|
||||
restore_running_xacts_callback = RestoreRunningXactsFromClog;
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"neon.disable_logical_replication_subscribers",
|
||||
"Disables incomming logical replication",
|
||||
NULL,
|
||||
&disable_logical_replication_subscribers,
|
||||
false,
|
||||
PGC_SIGHUP,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"neon.allow_replica_misconfig",
|
||||
|
||||
@@ -34,8 +34,6 @@ typedef enum
|
||||
T_NeonGetPageRequest,
|
||||
T_NeonDbSizeRequest,
|
||||
T_NeonGetSlruSegmentRequest,
|
||||
/* future tags above this line */
|
||||
T_NeonTestRequest = 99, /* only in cfg(feature = "testing") */
|
||||
|
||||
/* pagestore -> pagestore_client */
|
||||
T_NeonExistsResponse = 100,
|
||||
@@ -44,8 +42,6 @@ typedef enum
|
||||
T_NeonErrorResponse,
|
||||
T_NeonDbSizeResponse,
|
||||
T_NeonGetSlruSegmentResponse,
|
||||
/* future tags above this line */
|
||||
T_NeonTestResponse = 199, /* only in cfg(feature = "testing") */
|
||||
} NeonMessageTag;
|
||||
|
||||
typedef uint64 NeonRequestId;
|
||||
|
||||
190
poetry.lock
generated
190
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohappyeyeballs"
|
||||
@@ -6,7 +6,6 @@ version = "2.3.5"
|
||||
description = "Happy Eyeballs for asyncio"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiohappyeyeballs-2.3.5-py3-none-any.whl", hash = "sha256:4d6dea59215537dbc746e93e779caea8178c866856a721c9c660d7a5a7b8be03"},
|
||||
{file = "aiohappyeyeballs-2.3.5.tar.gz", hash = "sha256:6fa48b9f1317254f122a07a131a86b71ca6946ca989ce6326fff54a99a920105"},
|
||||
@@ -18,7 +17,6 @@ version = "3.10.11"
|
||||
description = "Async http client/server framework (asyncio)"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"},
|
||||
{file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"},
|
||||
@@ -130,7 +128,6 @@ version = "1.4.0"
|
||||
description = "Postgres integration with asyncio."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiopg-1.4.0-py3-none-any.whl", hash = "sha256:aea46e8aff30b039cfa818e6db4752c97656e893fc75e5a5dc57355a9e9dedbd"},
|
||||
{file = "aiopg-1.4.0.tar.gz", hash = "sha256:116253bef86b4d954116716d181e9a0294037f266718b2e1c9766af995639d71"},
|
||||
@@ -149,7 +146,6 @@ version = "1.3.1"
|
||||
description = "aiosignal: a list of registered asynchronous callbacks"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
|
||||
{file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
|
||||
@@ -164,7 +160,6 @@ version = "2.13.2"
|
||||
description = "Allure pytest integration"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "allure-pytest-2.13.2.tar.gz", hash = "sha256:22243159e8ec81ce2b5254b4013802198821b1b42f118f69d4a289396607c7b3"},
|
||||
{file = "allure_pytest-2.13.2-py3-none-any.whl", hash = "sha256:17de9dbee7f61c8e66a5b5e818b00e419dbcea44cb55c24319401ba813220690"},
|
||||
@@ -180,7 +175,6 @@ version = "2.13.2"
|
||||
description = "Common module for integrate allure with python-based frameworks"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "allure-python-commons-2.13.2.tar.gz", hash = "sha256:8a03681330231b1deadd86b97ff68841c6591320114ae638570f1ed60d7a2033"},
|
||||
{file = "allure_python_commons-2.13.2-py3-none-any.whl", hash = "sha256:2bb3646ec3fbf5b36d178a5e735002bc130ae9f9ba80f080af97d368ba375051"},
|
||||
@@ -196,7 +190,6 @@ version = "0.6.0"
|
||||
description = "Reusable constraint types to use with typing.Annotated"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
|
||||
{file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
|
||||
@@ -208,7 +201,6 @@ version = "4.13.1"
|
||||
description = "ANTLR 4.13.1 runtime for Python 3"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "antlr4-python3-runtime-4.13.1.tar.gz", hash = "sha256:3cd282f5ea7cfb841537fe01f143350fdb1c0b1ce7981443a2fa8513fddb6d1a"},
|
||||
{file = "antlr4_python3_runtime-4.13.1-py3-none-any.whl", hash = "sha256:78ec57aad12c97ac039ca27403ad61cb98aaec8a3f9bb8144f889aa0fa28b943"},
|
||||
@@ -220,7 +212,6 @@ version = "4.3.0"
|
||||
description = "High level compatibility layer for multiple asynchronous event loop implementations"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "anyio-4.3.0-py3-none-any.whl", hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8"},
|
||||
{file = "anyio-4.3.0.tar.gz", hash = "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6"},
|
||||
@@ -241,7 +232,6 @@ version = "4.0.3"
|
||||
description = "Timeout context manager for asyncio programs"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
|
||||
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
|
||||
@@ -253,7 +243,6 @@ version = "0.30.0"
|
||||
description = "An asyncio PostgreSQL driver"
|
||||
optional = false
|
||||
python-versions = ">=3.8.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfb4dd5ae0699bad2b233672c8fc5ccbd9ad24b89afded02341786887e37927e"},
|
||||
{file = "asyncpg-0.30.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc1f62c792752a49f88b7e6f774c26077091b44caceb1983509edc18a2222ec0"},
|
||||
@@ -317,7 +306,6 @@ version = "21.4.0"
|
||||
description = "Classes Without Boilerplate"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
|
||||
{file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
|
||||
@@ -335,7 +323,6 @@ version = "1.88.0"
|
||||
description = "AWS SAM Translator is a library that transform SAM templates into AWS CloudFormation templates"
|
||||
optional = false
|
||||
python-versions = "!=4.0,<=4.0,>=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aws_sam_translator-1.88.0-py3-none-any.whl", hash = "sha256:aa93d498d8de3fb3d485c316155b1628144b823bbc176099a20de06df666fcac"},
|
||||
{file = "aws_sam_translator-1.88.0.tar.gz", hash = "sha256:e77c65f3488566122277accd44a0f1ec018e37403e0d5fe25120d96e537e91a7"},
|
||||
@@ -356,7 +343,6 @@ version = "2.10.0"
|
||||
description = "The AWS X-Ray SDK for Python (the SDK) enables Python developers to record and emit information from within their applications to the AWS X-Ray service."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "aws-xray-sdk-2.10.0.tar.gz", hash = "sha256:9b14924fd0628cf92936055864655354003f0b1acc3e1c3ffde6403d0799dd7a"},
|
||||
{file = "aws_xray_sdk-2.10.0-py2.py3-none-any.whl", hash = "sha256:7551e81a796e1a5471ebe84844c40e8edf7c218db33506d046fec61f7495eda4"},
|
||||
@@ -372,7 +358,6 @@ version = "2.2.1"
|
||||
description = "Function decoration for backoff and retry"
|
||||
optional = false
|
||||
python-versions = ">=3.7,<4.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
|
||||
{file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
|
||||
@@ -384,7 +369,6 @@ version = "1.34.11"
|
||||
description = "The AWS SDK for Python"
|
||||
optional = false
|
||||
python-versions = ">= 3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "boto3-1.34.11-py3-none-any.whl", hash = "sha256:1af021e0c6e3040e8de66d403e963566476235bb70f9a8e3f6784813ac2d8026"},
|
||||
{file = "boto3-1.34.11.tar.gz", hash = "sha256:31c130a40ec0631059b77d7e87f67ad03ff1685a5b37638ac0c4687026a3259d"},
|
||||
@@ -404,7 +388,6 @@ version = "1.26.16"
|
||||
description = "Type annotations for boto3 1.26.16 generated with mypy-boto3-builder 7.11.11"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "boto3-stubs-1.26.16.tar.gz", hash = "sha256:618253ae19f1480785759bcaee8c8b10ed3fc037027247c26a3461a50f58406d"},
|
||||
{file = "boto3_stubs-1.26.16-py3-none-any.whl", hash = "sha256:8cf2925bc3e1349c93eb0f49c1061affc5ca314d69eeb335349037969d0787ed"},
|
||||
@@ -749,7 +732,6 @@ version = "1.34.11"
|
||||
description = "Low-level, data-driven core of boto 3."
|
||||
optional = false
|
||||
python-versions = ">= 3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "botocore-1.34.11-py3-none-any.whl", hash = "sha256:1ff1398b6ea670e1c01ac67a33af3da854f8e700d3528289c04f319c330d8250"},
|
||||
{file = "botocore-1.34.11.tar.gz", hash = "sha256:51905c3d623c60df5dc5794387de7caf886d350180a01a3dfa762e903edb45a9"},
|
||||
@@ -769,7 +751,6 @@ version = "1.27.38"
|
||||
description = "Type annotations for botocore 1.27.38 generated with mypy-boto3-builder 7.10.1"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "botocore-stubs-1.27.38.tar.gz", hash = "sha256:408e8b86b5d171b58f81c74ca9d3b5317a5a8e2d3bc2073aa841ac13b8939e56"},
|
||||
{file = "botocore_stubs-1.27.38-py3-none-any.whl", hash = "sha256:7add7641e9a479a9c8366893bb522fd9ca3d58714201e43662a200a148a1bc38"},
|
||||
@@ -784,7 +765,6 @@ version = "2024.7.4"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
|
||||
{file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
|
||||
@@ -796,7 +776,6 @@ version = "1.17.1"
|
||||
description = "Foreign Function Interface for Python calling C code."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
|
||||
{file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
|
||||
@@ -876,7 +855,6 @@ version = "0.87.1"
|
||||
description = "Checks CloudFormation templates for practices and behaviour that could potentially be improved"
|
||||
optional = false
|
||||
python-versions = "!=4.0,<=4.0,>=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "cfn_lint-0.87.1-py3-none-any.whl", hash = "sha256:d450f450635fc223b6f66880ccac52a5fd1a52966fa1705f1ba52b88dfed3071"},
|
||||
{file = "cfn_lint-0.87.1.tar.gz", hash = "sha256:b3ce9d3e5e0eadcea5d584c8ccaa00bf2a990a36a64d7ffd8683bc60b7e4f06f"},
|
||||
@@ -900,7 +878,6 @@ version = "2.1.0"
|
||||
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "charset-normalizer-2.1.0.tar.gz", hash = "sha256:575e708016ff3a5e3681541cb9d79312c416835686d054a23accb873b254f413"},
|
||||
{file = "charset_normalizer-2.1.0-py3-none-any.whl", hash = "sha256:5189b6f22b01957427f35b6a08d9a0bc45b46d3788ef5a92e978433c7a35f8a5"},
|
||||
@@ -915,7 +892,6 @@ version = "8.1.3"
|
||||
description = "Composable command line interface toolkit"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
|
||||
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
|
||||
@@ -930,7 +906,6 @@ version = "0.7.17"
|
||||
description = "ClickHouse Database Core Driver for Python, Pandas, and Superset"
|
||||
optional = false
|
||||
python-versions = "~=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "clickhouse-connect-0.7.17.tar.gz", hash = "sha256:854f1f9f3e024e7f89ae5d57cd3289d7a4c3dc91a9f24c4d233014f0ea19cb2d"},
|
||||
{file = "clickhouse_connect-0.7.17-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aca36f5f28be1ada2981fce87724bbf451f267c918015baec59e527de3c9c882"},
|
||||
@@ -1021,8 +996,6 @@ version = "0.4.5"
|
||||
description = "Cross-platform colored terminal text."
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
groups = ["main"]
|
||||
markers = "sys_platform == \"win32\" or platform_system == \"Windows\""
|
||||
files = [
|
||||
{file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
|
||||
{file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
|
||||
@@ -1034,7 +1007,6 @@ version = "43.0.1"
|
||||
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"},
|
||||
{file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062"},
|
||||
@@ -1084,7 +1056,6 @@ version = "7.1.0"
|
||||
description = "A Python library for the Docker Engine API."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
|
||||
{file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
|
||||
@@ -1107,7 +1078,6 @@ version = "1.9.0"
|
||||
description = "execnet: rapid multi-Python deployment"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "execnet-1.9.0-py2.py3-none-any.whl", hash = "sha256:a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142"},
|
||||
{file = "execnet-1.9.0.tar.gz", hash = "sha256:8f694f3ba9cc92cab508b152dcfe322153975c29bda272e2fd7f3f00f36e47c5"},
|
||||
@@ -1122,7 +1092,6 @@ version = "2.2.5"
|
||||
description = "A simple framework for building complex web applications."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"},
|
||||
{file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"},
|
||||
@@ -1144,7 +1113,6 @@ version = "5.0.0"
|
||||
description = "A Flask extension adding a decorator for CORS support"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "Flask_Cors-5.0.0-py2.py3-none-any.whl", hash = "sha256:b9e307d082a9261c100d8fb0ba909eec6a228ed1b60a8315fd85f783d61910bc"},
|
||||
{file = "flask_cors-5.0.0.tar.gz", hash = "sha256:5aadb4b950c4e93745034594d9f3ea6591f734bb3662e16e255ffbf5e89c88ef"},
|
||||
@@ -1159,7 +1127,6 @@ version = "1.5.0"
|
||||
description = "A list-like structure which implements collections.abc.MutableSequence"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
|
||||
{file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
|
||||
@@ -1261,7 +1228,6 @@ version = "3.2.1"
|
||||
description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL."
|
||||
optional = false
|
||||
python-versions = ">=3.6,<4"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "graphql-core-3.2.1.tar.gz", hash = "sha256:9d1bf141427b7d54be944587c8349df791ce60ade2e3cccaf9c56368c133c201"},
|
||||
{file = "graphql_core-3.2.1-py3-none-any.whl", hash = "sha256:f83c658e4968998eed1923a2e3e3eddd347e005ac0315fbb7ca4d70ea9156323"},
|
||||
@@ -1273,7 +1239,6 @@ version = "0.14.0"
|
||||
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
|
||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
||||
@@ -1282,33 +1247,27 @@ files = [
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "4.1.0"
|
||||
description = "Pure-Python HTTP/2 protocol implementation"
|
||||
description = "HTTP/2 State-Machine based protocol implementation"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = []
|
||||
develop = false
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
|
||||
{file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
hpack = ">=4.1,<5"
|
||||
hyperframe = ">=6.1,<7"
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/python-hyper/h2"
|
||||
reference = "HEAD"
|
||||
resolved_reference = "0b98b244b5fd1fe96100ac14905417a3b70a4286"
|
||||
hpack = ">=4.0,<5"
|
||||
hyperframe = ">=6.0,<7"
|
||||
|
||||
[[package]]
|
||||
name = "hpack"
|
||||
version = "4.1.0"
|
||||
description = "Pure-Python HPACK header encoding"
|
||||
version = "4.0.0"
|
||||
description = "Pure-Python HPACK header compression"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"},
|
||||
{file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"},
|
||||
{file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
|
||||
{file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1317,7 +1276,6 @@ version = "1.0.3"
|
||||
description = "A minimal low-level HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
|
||||
{file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
|
||||
@@ -1339,7 +1297,6 @@ version = "0.26.0"
|
||||
description = "The next generation HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"},
|
||||
{file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"},
|
||||
@@ -1361,14 +1318,13 @@ socks = ["socksio (==1.*)"]
|
||||
|
||||
[[package]]
|
||||
name = "hyperframe"
|
||||
version = "6.1.0"
|
||||
description = "Pure-Python HTTP/2 framing"
|
||||
version = "6.0.1"
|
||||
description = "HTTP/2 framing layer for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"},
|
||||
{file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"},
|
||||
{file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
|
||||
{file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1377,7 +1333,6 @@ version = "3.7"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
|
||||
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
|
||||
@@ -1389,7 +1344,6 @@ version = "1.1.1"
|
||||
description = "iniconfig: brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
|
||||
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
|
||||
@@ -1401,7 +1355,6 @@ version = "2.1.2"
|
||||
description = "Safely pass data to untrusted environments and back."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"},
|
||||
{file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"},
|
||||
@@ -1413,7 +1366,6 @@ version = "3.1.5"
|
||||
description = "A very fast and expressive template engine."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
|
||||
{file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
|
||||
@@ -1431,7 +1383,6 @@ version = "1.0.1"
|
||||
description = "JSON Matching Expressions"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
|
||||
{file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
|
||||
@@ -1443,7 +1394,6 @@ version = "0.9.0"
|
||||
description = "The ultimate Python library for JOSE RFCs, including JWS, JWE, JWK, JWA, JWT"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "joserfc-0.9.0-py3-none-any.whl", hash = "sha256:4026bdbe2c196cd40574e916fa1e28874d99649412edaab0e373dec3077153fb"},
|
||||
{file = "joserfc-0.9.0.tar.gz", hash = "sha256:eebca7f587b1761ce43a98ffd5327f2b600b9aa5bb0a77b947687f503ad43bc0"},
|
||||
@@ -1461,7 +1411,6 @@ version = "1.2.3"
|
||||
description = "Generate source code for Python classes from a JSON schema."
|
||||
optional = false
|
||||
python-versions = ">= 2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jschema_to_python-1.2.3-py3-none-any.whl", hash = "sha256:8a703ca7604d42d74b2815eecf99a33359a8dccbb80806cce386d5e2dd992b05"},
|
||||
{file = "jschema_to_python-1.2.3.tar.gz", hash = "sha256:76ff14fe5d304708ccad1284e4b11f96a658949a31ee7faed9e0995279549b91"},
|
||||
@@ -1478,7 +1427,6 @@ version = "2.0.0"
|
||||
description = "Diff JSON and JSON-like structures in Python"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsondiff-2.0.0-py3-none-any.whl", hash = "sha256:689841d66273fc88fc79f7d33f4c074774f4f214b6466e3aff0e5adaf889d1e0"},
|
||||
{file = "jsondiff-2.0.0.tar.gz", hash = "sha256:2795844ef075ec8a2b8d385c4d59f5ea48b08e7180fce3cb2787be0db00b1fb4"},
|
||||
@@ -1490,8 +1438,6 @@ version = "0.20.0"
|
||||
description = "Python bindings for Jsonnet - The data templating language"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
markers = "python_version < \"3.13\""
|
||||
files = [
|
||||
{file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"},
|
||||
]
|
||||
@@ -1502,7 +1448,6 @@ version = "1.32"
|
||||
description = "Apply JSON-Patches (RFC 6902)"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonpatch-1.32-py2.py3-none-any.whl", hash = "sha256:26ac385719ac9f54df8a2f0827bb8253aa3ea8ab7b3368457bcdb8c14595a397"},
|
||||
{file = "jsonpatch-1.32.tar.gz", hash = "sha256:b6ddfe6c3db30d81a96aaeceb6baf916094ffa23d7dd5fa2c13e13f8b6e600c2"},
|
||||
@@ -1517,7 +1462,6 @@ version = "1.6.1"
|
||||
description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonpath-ng-1.6.1.tar.gz", hash = "sha256:086c37ba4917304850bd837aeab806670224d3f038fe2833ff593a672ef0a5fa"},
|
||||
{file = "jsonpath_ng-1.6.1-py3-none-any.whl", hash = "sha256:8f22cd8273d7772eea9aaa84d922e0841aa36fdb8a2c6b7f6c3791a16a9bc0be"},
|
||||
@@ -1532,7 +1476,6 @@ version = "2.2.0"
|
||||
description = "Python library for serializing any arbitrary object graph into JSON"
|
||||
optional = false
|
||||
python-versions = ">=2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonpickle-2.2.0-py2.py3-none-any.whl", hash = "sha256:de7f2613818aa4f234138ca11243d6359ff83ae528b2185efdd474f62bcf9ae1"},
|
||||
{file = "jsonpickle-2.2.0.tar.gz", hash = "sha256:7b272918b0554182e53dc340ddd62d9b7f902fec7e7b05620c04f3ccef479a0e"},
|
||||
@@ -1549,7 +1492,6 @@ version = "2.3"
|
||||
description = "Identify specific nodes in a JSON document (RFC 6901)"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonpointer-2.3-py2.py3-none-any.whl", hash = "sha256:51801e558539b4e9cd268638c078c6c5746c9ac96bc38152d443400e4f3793e9"},
|
||||
{file = "jsonpointer-2.3.tar.gz", hash = "sha256:97cba51526c829282218feb99dab1b1e6bdf8efd1c43dc9d57be093c0d69c99a"},
|
||||
@@ -1561,7 +1503,6 @@ version = "4.17.3"
|
||||
description = "An implementation of JSON Schema validation for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"},
|
||||
{file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"},
|
||||
@@ -1581,7 +1522,6 @@ version = "0.1.6"
|
||||
description = "JSONSchema Spec with object-oriented paths"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jsonschema_spec-0.1.6-py3-none-any.whl", hash = "sha256:f2206d18c89d1824c1f775ba14ed039743b41a9167bd2c5bdb774b66b3ca0bbf"},
|
||||
{file = "jsonschema_spec-0.1.6.tar.gz", hash = "sha256:90215863b56e212086641956b20127ccbf6d8a3a38343dad01d6a74d19482f76"},
|
||||
@@ -1599,7 +1539,6 @@ version = "1.9"
|
||||
description = "Creates JUnit XML test result documents that can be read by tools such as Jenkins"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "junit-xml-1.9.tar.gz", hash = "sha256:de16a051990d4e25a3982b2dd9e89d671067548718866416faec14d9de56db9f"},
|
||||
{file = "junit_xml-1.9-py2.py3-none-any.whl", hash = "sha256:ec5ca1a55aefdd76d28fcc0b135251d156c7106fa979686a4b48d62b761b4732"},
|
||||
@@ -1614,7 +1553,6 @@ version = "1.5.6"
|
||||
description = "Implementation of JOSE Web standards"
|
||||
optional = false
|
||||
python-versions = ">= 3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "jwcrypto-1.5.6-py3-none-any.whl", hash = "sha256:150d2b0ebbdb8f40b77f543fb44ffd2baeff48788be71f67f03566692fd55789"},
|
||||
{file = "jwcrypto-1.5.6.tar.gz", hash = "sha256:771a87762a0c081ae6166958a954f80848820b2ab066937dc8b8379d65b1b039"},
|
||||
@@ -1630,7 +1568,6 @@ version = "2.0.2"
|
||||
description = "Pure Python client for Apache Kafka"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "kafka-python-2.0.2.tar.gz", hash = "sha256:04dfe7fea2b63726cd6f3e79a2d86e709d608d74406638c5da33a01d45a9d7e3"},
|
||||
{file = "kafka_python-2.0.2-py2.py3-none-any.whl", hash = "sha256:2d92418c7cb1c298fa6c7f0fb3519b520d0d7526ac6cb7ae2a4fc65a51a94b6e"},
|
||||
@@ -1645,7 +1582,6 @@ version = "1.10.0"
|
||||
description = "A fast and thorough lazy object proxy."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "lazy-object-proxy-1.10.0.tar.gz", hash = "sha256:78247b6d45f43a52ef35c25b5581459e85117225408a4128a3daf8bf9648ac69"},
|
||||
{file = "lazy_object_proxy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:855e068b0358ab916454464a884779c7ffa312b8925c6f7401e952dcf3b89977"},
|
||||
@@ -1692,7 +1628,6 @@ version = "4.3.3"
|
||||
description = "LZ4 Bindings for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "lz4-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b891880c187e96339474af2a3b2bfb11a8e4732ff5034be919aa9029484cd201"},
|
||||
{file = "lz4-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:222a7e35137d7539c9c33bb53fcbb26510c5748779364014235afc62b0ec797f"},
|
||||
@@ -1743,7 +1678,6 @@ version = "2.1.1"
|
||||
description = "Safely add untrusted strings to HTML/XML markup."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
|
||||
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
|
||||
@@ -1793,7 +1727,6 @@ version = "5.0.6"
|
||||
description = ""
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "moto-5.0.6-py2.py3-none-any.whl", hash = "sha256:ca1e22831a741733b581ff2ef4d6ae2e1c6db1eab97af1b78b86ca2c6e88c609"},
|
||||
{file = "moto-5.0.6.tar.gz", hash = "sha256:ad8b23f2b555ad694da8b2432a42b6d96beaaf67a4e7d932196a72193a2eee2c"},
|
||||
@@ -1853,7 +1786,6 @@ version = "1.3.0"
|
||||
description = "Python library for arbitrary-precision floating-point arithmetic"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
|
||||
{file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
|
||||
@@ -1871,7 +1803,6 @@ version = "6.0.5"
|
||||
description = "multidict implementation"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"},
|
||||
{file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"},
|
||||
@@ -1971,7 +1902,6 @@ version = "1.13.0"
|
||||
description = "Optional static typing for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"},
|
||||
{file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"},
|
||||
@@ -2024,7 +1954,6 @@ version = "1.26.0.post1"
|
||||
description = "Type annotations for boto3.S3 1.26.0 service generated with mypy-boto3-builder 7.11.10"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "mypy-boto3-s3-1.26.0.post1.tar.gz", hash = "sha256:6d7079f8c739dc993cbedad0736299c413b297814b73795a3855a79169ecc938"},
|
||||
{file = "mypy_boto3_s3-1.26.0.post1-py3-none-any.whl", hash = "sha256:7de2792ff0cc541b84cd46ff3a6aa2b6e5f267217f2203f27f6e4016bddc644d"},
|
||||
@@ -2039,7 +1968,6 @@ version = "1.0.0"
|
||||
description = "Type system extensions for programs checked with the mypy type checker."
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
|
||||
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
||||
@@ -2051,7 +1979,6 @@ version = "2.8.5"
|
||||
description = "Python package for creating and manipulating graphs and networks"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "networkx-2.8.5-py3-none-any.whl", hash = "sha256:a762f4b385692d9c3a6f2912d058d76d29a827deaedf9e63ed14d397b8030687"},
|
||||
{file = "networkx-2.8.5.tar.gz", hash = "sha256:15a7b81a360791c458c55a417418ea136c13378cfdc06a2dcdc12bd2f9cf09c1"},
|
||||
@@ -2070,7 +1997,6 @@ version = "0.4.4"
|
||||
description = "OpenAPI schema validation for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openapi_schema_validator-0.4.4-py3-none-any.whl", hash = "sha256:79f37f38ef9fd5206b924ed7a6f382cea7b649b3b56383c47f1906082b7b9015"},
|
||||
{file = "openapi_schema_validator-0.4.4.tar.gz", hash = "sha256:c573e2be2c783abae56c5a1486ab716ca96e09d1c3eab56020d1dc680aa57bf8"},
|
||||
@@ -2089,7 +2015,6 @@ version = "0.5.7"
|
||||
description = "OpenAPI 2.0 (aka Swagger) and OpenAPI 3 spec validator"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "openapi_spec_validator-0.5.7-py3-none-any.whl", hash = "sha256:8712d2879db7692974ef89c47a3ebfc79436442921ec3a826ac0ce80cde8c549"},
|
||||
{file = "openapi_spec_validator-0.5.7.tar.gz", hash = "sha256:6c2d42180045a80fd6314de848b94310bdb0fa4949f4b099578b69f79d9fa5ac"},
|
||||
@@ -2107,7 +2032,6 @@ version = "24.2"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
|
||||
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
|
||||
@@ -2119,7 +2043,6 @@ version = "0.4.3"
|
||||
description = "Object-oriented paths"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0,<4.0.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pathable-0.4.3-py3-none-any.whl", hash = "sha256:cdd7b1f9d7d5c8b8d3315dbf5a86b2596053ae845f056f57d97c0eefff84da14"},
|
||||
{file = "pathable-0.4.3.tar.gz", hash = "sha256:5c869d315be50776cc8a993f3af43e0c60dc01506b399643f919034ebf4cdcab"},
|
||||
@@ -2131,7 +2054,6 @@ version = "5.9.0"
|
||||
description = "Python Build Reasonableness"
|
||||
optional = false
|
||||
python-versions = ">=2.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pbr-5.9.0-py2.py3-none-any.whl", hash = "sha256:e547125940bcc052856ded43be8e101f63828c2d94239ffbe2b327ba3d5ccf0a"},
|
||||
{file = "pbr-5.9.0.tar.gz", hash = "sha256:e8dca2f4b43560edef58813969f52a56cef023146cbb8931626db80e6c1c4308"},
|
||||
@@ -2143,7 +2065,6 @@ version = "1.0.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
||||
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
||||
@@ -2159,7 +2080,6 @@ version = "3.11"
|
||||
description = "Python Lex & Yacc"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
|
||||
{file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
|
||||
@@ -2171,7 +2091,6 @@ version = "0.14.1"
|
||||
description = "Python client for the Prometheus monitoring system."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "prometheus_client-0.14.1-py3-none-any.whl", hash = "sha256:522fded625282822a89e2773452f42df14b5a8e84a86433e3f8a189c1d54dc01"},
|
||||
{file = "prometheus_client-0.14.1.tar.gz", hash = "sha256:5459c427624961076277fdc6dc50540e2bacb98eebde99886e59ec55ed92093a"},
|
||||
@@ -2186,7 +2105,6 @@ version = "0.2.0"
|
||||
description = "Accelerated property cache"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
|
||||
{file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
|
||||
@@ -2294,7 +2212,6 @@ version = "5.9.4"
|
||||
description = "Cross-platform lib for process and system monitoring in Python."
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"},
|
||||
{file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"},
|
||||
@@ -2321,7 +2238,6 @@ version = "2.9.10"
|
||||
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2"},
|
||||
{file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f"},
|
||||
@@ -2370,7 +2286,6 @@ files = [
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"},
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"},
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"},
|
||||
{file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"},
|
||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"},
|
||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"},
|
||||
{file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"},
|
||||
@@ -2399,7 +2314,6 @@ version = "0.5.4"
|
||||
description = "Pure Python PartiQL Parser"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "py_partiql_parser-0.5.4-py2.py3-none-any.whl", hash = "sha256:3dc4295a47da9587681a96b35c6e151886fdbd0a4acbe0d97c4c68e5f689d315"},
|
||||
{file = "py_partiql_parser-0.5.4.tar.gz", hash = "sha256:72e043919538fa63edae72fb59afc7e3fd93adbde656718a7d2b4666f23dd114"},
|
||||
@@ -2414,7 +2328,6 @@ version = "2.21"
|
||||
description = "C parser in Python"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
|
||||
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
|
||||
@@ -2426,7 +2339,6 @@ version = "2.10.4"
|
||||
description = "Data validation using Python type hints"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"},
|
||||
{file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"},
|
||||
@@ -2447,7 +2359,6 @@ version = "2.27.2"
|
||||
description = "Core functionality for Pydantic validation and serialization"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
|
||||
{file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
|
||||
@@ -2560,7 +2471,6 @@ version = "2.4.0"
|
||||
description = "JSON Web Token implementation in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"},
|
||||
{file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"},
|
||||
@@ -2581,7 +2491,6 @@ version = "3.0.9"
|
||||
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
|
||||
optional = false
|
||||
python-versions = ">=3.6.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
|
||||
{file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
|
||||
@@ -2596,7 +2505,6 @@ version = "0.18.1"
|
||||
description = "Persistent/Functional/Immutable data structures"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"},
|
||||
{file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"},
|
||||
@@ -2627,7 +2535,6 @@ version = "7.4.4"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
|
||||
{file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
|
||||
@@ -2648,7 +2555,6 @@ version = "0.21.0"
|
||||
description = "Pytest support for asyncio"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-asyncio-0.21.0.tar.gz", hash = "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b"},
|
||||
{file = "pytest_asyncio-0.21.0-py3-none-any.whl", hash = "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"},
|
||||
@@ -2667,7 +2573,6 @@ version = "1.0.8"
|
||||
description = "pytest-httpserver is a httpserver for pytest"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<4.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest_httpserver-1.0.8-py3-none-any.whl", hash = "sha256:24cd3d9f6a0b927c7bfc400d0b3fda7442721b8267ce29942bf307b190f0bb09"},
|
||||
{file = "pytest_httpserver-1.0.8.tar.gz", hash = "sha256:e052f69bc8a9073db02484681e8e47004dd1fb3763b0ae833bd899e5895c559a"},
|
||||
@@ -2682,7 +2587,6 @@ version = "0.6.3"
|
||||
description = "It helps to use fixtures in pytest.mark.parametrize"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-lazy-fixture-0.6.3.tar.gz", hash = "sha256:0e7d0c7f74ba33e6e80905e9bfd81f9d15ef9a790de97993e34213deb5ad10ac"},
|
||||
{file = "pytest_lazy_fixture-0.6.3-py3-none-any.whl", hash = "sha256:e0b379f38299ff27a653f03eaa69b08a6fd4484e46fd1c9907d984b9f9daeda6"},
|
||||
@@ -2697,7 +2601,6 @@ version = "1.1.0"
|
||||
description = "pytest plugin to run your tests in a specific order"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-order-1.1.0.tar.gz", hash = "sha256:139d25b30826b78eebb42722f747eab14c44b88059d7a71d4f79d14a057269a5"},
|
||||
{file = "pytest_order-1.1.0-py3-none-any.whl", hash = "sha256:3b3730969c97900fa5cd31ecff80847680ed56b2490954565c14949ba60d9371"},
|
||||
@@ -2712,7 +2615,6 @@ version = "0.9.3"
|
||||
description = "pytest plugin for repeating tests"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest_repeat-0.9.3-py3-none-any.whl", hash = "sha256:26ab2df18226af9d5ce441c858f273121e92ff55f5bb311d25755b8d7abdd8ed"},
|
||||
{file = "pytest_repeat-0.9.3.tar.gz", hash = "sha256:ffd3836dfcd67bb270bec648b330e20be37d2966448c4148c4092d1e8aba8185"},
|
||||
@@ -2727,7 +2629,6 @@ version = "15.0"
|
||||
description = "pytest plugin to re-run tests to eliminate flaky failures"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-rerunfailures-15.0.tar.gz", hash = "sha256:2d9ac7baf59f4c13ac730b47f6fa80e755d1ba0581da45ce30b72fb3542b4474"},
|
||||
{file = "pytest_rerunfailures-15.0-py3-none-any.whl", hash = "sha256:dd150c4795c229ef44320adc9a0c0532c51b78bb7a6843a8c53556b9a611df1a"},
|
||||
@@ -2743,7 +2644,6 @@ version = "0.8.1"
|
||||
description = "Pytest plugin which splits the test suite to equally sized sub suites based on test execution time."
|
||||
optional = false
|
||||
python-versions = ">=3.7.1,<4.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest_split-0.8.1-py3-none-any.whl", hash = "sha256:74b110ea091bd147cc1c5f9665a59506e5cedfa66f96a89fb03e4ab447c2c168"},
|
||||
{file = "pytest_split-0.8.1.tar.gz", hash = "sha256:2d88bd3dc528689a7a3f58fc12ea165c3aa62e90795e420dfad920afe5612d6d"},
|
||||
@@ -2758,7 +2658,6 @@ version = "2.1.0"
|
||||
description = "pytest plugin to abort hanging tests"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-timeout-2.1.0.tar.gz", hash = "sha256:c07ca07404c612f8abbe22294b23c368e2e5104b521c1790195561f37e1ac3d9"},
|
||||
{file = "pytest_timeout-2.1.0-py3-none-any.whl", hash = "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6"},
|
||||
@@ -2773,7 +2672,6 @@ version = "3.3.1"
|
||||
description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytest-xdist-3.3.1.tar.gz", hash = "sha256:d5ee0520eb1b7bcca50a60a518ab7a7707992812c578198f8b44fdfac78e8c93"},
|
||||
{file = "pytest_xdist-3.3.1-py3-none-any.whl", hash = "sha256:ff9daa7793569e6a68544850fd3927cd257cc03a7ef76c95e86915355e82b5f2"},
|
||||
@@ -2794,7 +2692,6 @@ version = "2.8.2"
|
||||
description = "Extensions to the standard Python datetime module"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
|
||||
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
|
||||
@@ -2809,7 +2706,6 @@ version = "1.0.1"
|
||||
description = "Read key-value pairs from a .env file and set them as environment variables"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
|
||||
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
|
||||
@@ -2824,7 +2720,6 @@ version = "2024.1"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
|
||||
{file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
|
||||
@@ -2836,8 +2731,6 @@ version = "308"
|
||||
description = "Python for Window Extensions"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
markers = "sys_platform == \"win32\""
|
||||
files = [
|
||||
{file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
|
||||
{file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
|
||||
@@ -2865,7 +2758,6 @@ version = "6.0.2"
|
||||
description = "YAML parser and emitter for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
|
||||
@@ -2928,7 +2820,6 @@ version = "2024.4.28"
|
||||
description = "Alternative regular expression module, to replace re."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "regex-2024.4.28-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd196d056b40af073d95a2879678585f0b74ad35190fac04ca67954c582c6b61"},
|
||||
{file = "regex-2024.4.28-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8bb381f777351bd534462f63e1c6afb10a7caa9fa2a421ae22c26e796fe31b1f"},
|
||||
@@ -3017,7 +2908,6 @@ version = "2.32.3"
|
||||
description = "Python HTTP for Humans."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
|
||||
{file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
|
||||
@@ -3039,7 +2929,6 @@ version = "0.25.3"
|
||||
description = "A utility library for mocking out the `requests` Python library."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "responses-0.25.3-py3-none-any.whl", hash = "sha256:521efcbc82081ab8daa588e08f7e8a64ce79b91c39f6e62199b19159bea7dbcb"},
|
||||
{file = "responses-0.25.3.tar.gz", hash = "sha256:617b9247abd9ae28313d57a75880422d55ec63c29d33d629697590a034358dba"},
|
||||
@@ -3059,7 +2948,6 @@ version = "0.1.4"
|
||||
description = "A pure python RFC3339 validator"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"},
|
||||
{file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"},
|
||||
@@ -3074,7 +2962,6 @@ version = "0.7.0"
|
||||
description = "An extremely fast Python linter and code formatter, written in Rust."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "ruff-0.7.0-py3-none-linux_armv6l.whl", hash = "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628"},
|
||||
{file = "ruff-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737"},
|
||||
@@ -3102,7 +2989,6 @@ version = "0.10.0"
|
||||
description = "An Amazon S3 Transfer Manager"
|
||||
optional = false
|
||||
python-versions = ">= 3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "s3transfer-0.10.0-py3-none-any.whl", hash = "sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e"},
|
||||
{file = "s3transfer-0.10.0.tar.gz", hash = "sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b"},
|
||||
@@ -3120,7 +3006,6 @@ version = "1.0.4"
|
||||
description = "Classes implementing the SARIF 2.1.0 object model."
|
||||
optional = false
|
||||
python-versions = ">= 2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "sarif_om-1.0.4-py3-none-any.whl", hash = "sha256:539ef47a662329b1c8502388ad92457425e95dc0aaaf995fe46f4984c4771911"},
|
||||
{file = "sarif_om-1.0.4.tar.gz", hash = "sha256:cd5f416b3083e00d402a92e449a7ff67af46f11241073eea0461802a3b5aef98"},
|
||||
@@ -3136,7 +3021,6 @@ version = "70.0.0"
|
||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"},
|
||||
{file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"},
|
||||
@@ -3152,7 +3036,6 @@ version = "1.16.0"
|
||||
description = "Python 2 and 3 compatibility utilities"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
|
||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||
@@ -3164,7 +3047,6 @@ version = "1.3.0"
|
||||
description = "Sniff out which async library your code is running under"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
|
||||
{file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
|
||||
@@ -3176,7 +3058,6 @@ version = "1.12"
|
||||
description = "Computer algebra system (CAS) in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
|
||||
{file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
|
||||
@@ -3191,7 +3072,6 @@ version = "4.9.0"
|
||||
description = "Python library for throwaway instances of anything that can run in a Docker container"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "testcontainers-4.9.0-py3-none-any.whl", hash = "sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32"},
|
||||
{file = "testcontainers-4.9.0.tar.gz", hash = "sha256:2cd6af070109ff68c1ab5389dc89c86c2dc3ab30a21ca734b2cb8f0f80ad479e"},
|
||||
@@ -3245,7 +3125,6 @@ version = "0.10.2"
|
||||
description = "Python Library for Tom's Obvious, Minimal Language"
|
||||
optional = false
|
||||
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
|
||||
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
|
||||
@@ -3257,7 +3136,6 @@ version = "1.5.0.20240925"
|
||||
description = "Typing stubs for jwcrypto"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-jwcrypto-1.5.0.20240925.tar.gz", hash = "sha256:50e17b790378c96239344476c7bd13b52d0c7eeb6d16c2d53723e48cc6bbf4fe"},
|
||||
{file = "types_jwcrypto-1.5.0.20240925-py3-none-any.whl", hash = "sha256:2d12a2d528240d326075e896aafec7056b9136bf3207fa6ccf3fcb8fbf9e11a1"},
|
||||
@@ -3272,7 +3150,6 @@ version = "5.9.5.12"
|
||||
description = "Typing stubs for psutil"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-psutil-5.9.5.12.tar.gz", hash = "sha256:61a91679d3fe737250013b624dca09375e7cc3ad77dcc734553746c429c02aca"},
|
||||
{file = "types_psutil-5.9.5.12-py3-none-any.whl", hash = "sha256:e9a147b8561235c6afcce5aa1adb973fad9ab2c50cf89820697687f53510358f"},
|
||||
@@ -3284,7 +3161,6 @@ version = "2.9.21.20241019"
|
||||
description = "Typing stubs for psycopg2"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-psycopg2-2.9.21.20241019.tar.gz", hash = "sha256:bca89b988d2ebd19bcd08b177d22a877ea8b841decb10ed130afcf39404612fa"},
|
||||
{file = "types_psycopg2-2.9.21.20241019-py3-none-any.whl", hash = "sha256:44d091e67732d16a941baae48cd7b53bf91911bc36888652447cf1ef0c1fb3f6"},
|
||||
@@ -3296,7 +3172,6 @@ version = "0.6.3.3"
|
||||
description = "Typing stubs for pytest-lazy-fixture"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-pytest-lazy-fixture-0.6.3.3.tar.gz", hash = "sha256:2ef79d66bcde0e50acdac8dc55074b9ae0d4cfaeabdd638f5522f4cac7c8a2c7"},
|
||||
{file = "types_pytest_lazy_fixture-0.6.3.3-py3-none-any.whl", hash = "sha256:a56a55649147ff960ff79d4b2c781a4f769351abc1876873f3116d0bd0c96353"},
|
||||
@@ -3308,7 +3183,6 @@ version = "6.0.12.20240917"
|
||||
description = "Typing stubs for PyYAML"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-PyYAML-6.0.12.20240917.tar.gz", hash = "sha256:d1405a86f9576682234ef83bcb4e6fff7c9305c8b1fbad5e0bcd4f7dbdc9c587"},
|
||||
{file = "types_PyYAML-6.0.12.20240917-py3-none-any.whl", hash = "sha256:392b267f1c0fe6022952462bf5d6523f31e37f6cea49b14cee7ad634b6301570"},
|
||||
@@ -3320,7 +3194,6 @@ version = "2.31.0.0"
|
||||
description = "Typing stubs for requests"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-requests-2.31.0.0.tar.gz", hash = "sha256:c1c29d20ab8d84dff468d7febfe8e0cb0b4664543221b386605e14672b44ea25"},
|
||||
{file = "types_requests-2.31.0.0-py3-none-any.whl", hash = "sha256:7c5cea7940f8e92ec560bbc468f65bf684aa3dcf0554a6f8c4710f5f708dc598"},
|
||||
@@ -3335,7 +3208,6 @@ version = "0.6.0.post3"
|
||||
description = "Type annotations and code completion for s3transfer"
|
||||
optional = false
|
||||
python-versions = ">=3.7,<4.0"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-s3transfer-0.6.0.post3.tar.gz", hash = "sha256:92c3704e5d041202bfb5ddb79d083fd1a02de2c5dfec6a91576823e6b5c93993"},
|
||||
{file = "types_s3transfer-0.6.0.post3-py3-none-any.whl", hash = "sha256:eedc5117275565b3c83662c0ccc81662a34da5dda8bd502b89d296b6d5cb091d"},
|
||||
@@ -3347,7 +3219,6 @@ version = "0.10.8.6"
|
||||
description = "Typing stubs for toml"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-toml-0.10.8.6.tar.gz", hash = "sha256:6d3ac79e36c9ee593c5d4fb33a50cca0e3adceb6ef5cff8b8e5aef67b4c4aaf2"},
|
||||
{file = "types_toml-0.10.8.6-py3-none-any.whl", hash = "sha256:de7b2bb1831d6f7a4b554671ffe5875e729753496961b3e9b202745e4955dafa"},
|
||||
@@ -3359,7 +3230,6 @@ version = "1.26.17"
|
||||
description = "Typing stubs for urllib3"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "types-urllib3-1.26.17.tar.gz", hash = "sha256:73fd274524c3fc7cd8cd9ceb0cb67ed99b45f9cb2831013e46d50c1451044800"},
|
||||
{file = "types_urllib3-1.26.17-py3-none-any.whl", hash = "sha256:0d027fcd27dbb3cb532453b4d977e05bc1e13aefd70519866af211b3003d895d"},
|
||||
@@ -3371,7 +3241,6 @@ version = "4.12.2"
|
||||
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "dev"]
|
||||
files = [
|
||||
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
@@ -3383,7 +3252,6 @@ version = "1.26.19"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"},
|
||||
{file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"},
|
||||
@@ -3400,7 +3268,6 @@ version = "12.0"
|
||||
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
|
||||
{file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
|
||||
@@ -3482,7 +3349,6 @@ version = "3.0.6"
|
||||
description = "The comprehensive WSGI web application library."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "werkzeug-3.0.6-py3-none-any.whl", hash = "sha256:1bc0c2310d2fbb07b1dd1105eba2f7af72f322e1e455f2f93c993bee8c8a5f17"},
|
||||
{file = "werkzeug-3.0.6.tar.gz", hash = "sha256:a8dd59d4de28ca70471a34cba79bed5f7ef2e036a76b3ab0835474246eb41f8d"},
|
||||
@@ -3500,7 +3366,6 @@ version = "1.14.1"
|
||||
description = "Module for decorators, wrappers and monkey patching."
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"},
|
||||
{file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"},
|
||||
@@ -3521,16 +3386,6 @@ files = [
|
||||
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
|
||||
@@ -3584,7 +3439,6 @@ version = "0.13.0"
|
||||
description = "Makes working with XML feel like you are working with JSON"
|
||||
optional = false
|
||||
python-versions = ">=3.4"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"},
|
||||
{file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
|
||||
@@ -3596,7 +3450,6 @@ version = "1.17.2"
|
||||
description = "Yet another URL library"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "yarl-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:93771146ef048b34201bfa382c2bf74c524980870bb278e6df515efaf93699ff"},
|
||||
{file = "yarl-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8281db240a1616af2f9c5f71d355057e73a1409c4648c8949901396dc0a3c151"},
|
||||
@@ -3693,7 +3546,6 @@ version = "0.23.0"
|
||||
description = "Zstandard bindings for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "zstandard-0.23.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9"},
|
||||
{file = "zstandard-0.23.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880"},
|
||||
@@ -3801,6 +3653,6 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
|
||||
cffi = ["cffi (>=1.11)"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.11"
|
||||
content-hash = "4dc3165fe22c0e0f7a030ea0f8a680ae2ff74561d8658c393abbe9112caaf5d7"
|
||||
content-hash = "e6904aca09abc6c805604b21a5702a97e0056406f9ec7469b091d35ee10a6b16"
|
||||
|
||||
@@ -43,7 +43,7 @@ websockets = "^12.0"
|
||||
clickhouse-connect = "^0.7.16"
|
||||
kafka-python = "^2.0.2"
|
||||
jwcrypto = "^1.5.6"
|
||||
h2 = {git = "https://github.com/python-hyper/h2"}
|
||||
h2 = "^4.1.0"
|
||||
types-jwcrypto = "^1.5.0.20240925"
|
||||
pyyaml = "^6.0.2"
|
||||
types-pyyaml = "^6.0.12.20240917"
|
||||
@@ -94,7 +94,6 @@ target-version = "py311"
|
||||
extend-exclude = [
|
||||
"vendor/",
|
||||
"target/",
|
||||
"test_runner/stubs/", # Autogenerated by mypy's stubgen
|
||||
]
|
||||
line-length = 100 # this setting is rather guidance, it won't fail if it can't make the shorter
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
//! united.
|
||||
|
||||
use reqwest::{IntoUrl, Method, StatusCode};
|
||||
use safekeeper_api::models::{TimelineCreateRequest, TimelineStatus};
|
||||
use safekeeper_api::models::TimelineStatus;
|
||||
use std::error::Error as _;
|
||||
use utils::{
|
||||
http::error::HttpErrorBody,
|
||||
@@ -76,28 +76,6 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<TimelineStatus> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}",
|
||||
self.mgmt_api_endpoint, req.tenant_id, req.timeline_id
|
||||
);
|
||||
let resp = self.post(&uri, req).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn delete_timeline(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<TimelineStatus> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/timeline/{}",
|
||||
self.mgmt_api_endpoint, tenant_id, timeline_id
|
||||
);
|
||||
let resp = self.request(Method::DELETE, &uri, ()).await?;
|
||||
resp.json().await.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn timeline_status(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
@@ -124,19 +102,6 @@ impl Client {
|
||||
self.get(&uri).await
|
||||
}
|
||||
|
||||
pub async fn utilization(&self) -> Result<reqwest::Response> {
|
||||
let uri = format!("{}/v1/utilization/", self.mgmt_api_endpoint);
|
||||
self.get(&uri).await
|
||||
}
|
||||
|
||||
async fn post<B: serde::Serialize, U: IntoUrl>(
|
||||
&self,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
self.request(Method::POST, uri, body).await
|
||||
}
|
||||
|
||||
async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
|
||||
self.request(Method::GET, uri, ()).await
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user