mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-21 12:22:56 +00:00
Compare commits
16 Commits
conn_pools
...
enable_v17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dfe815a032 | ||
|
|
8c6a83659a | ||
|
|
0151deb76a | ||
|
|
461a680d64 | ||
|
|
3de2add040 | ||
|
|
aa6500a64e | ||
|
|
9968aec2c5 | ||
|
|
ad9800aa18 | ||
|
|
29e032d326 | ||
|
|
d754a70802 | ||
|
|
278fe5e736 | ||
|
|
a72919e0ec | ||
|
|
5a195605ad | ||
|
|
8d64d9cca4 | ||
|
|
321014d37a | ||
|
|
e410e794d6 |
@@ -5,7 +5,9 @@
|
||||
!Cargo.toml
|
||||
!Makefile
|
||||
!rust-toolchain.toml
|
||||
!scripts/combine_control_files.py
|
||||
!scripts/ninstall.sh
|
||||
!vm-cgconfig.conf
|
||||
!docker-compose/run-tests.sh
|
||||
|
||||
# Directories
|
||||
@@ -15,12 +17,15 @@
|
||||
!compute_tools/
|
||||
!control_plane/
|
||||
!libs/
|
||||
!neon_local/
|
||||
!pageserver/
|
||||
!patches/
|
||||
!pgxn/
|
||||
!proxy/
|
||||
!storage_scrubber/
|
||||
!safekeeper/
|
||||
!storage_broker/
|
||||
!storage_controller/
|
||||
!trace/
|
||||
!vendor/postgres-*/
|
||||
!workspace_hack/
|
||||
|
||||
41
.github/workflows/report-workflow-stats.yml
vendored
41
.github/workflows/report-workflow-stats.yml
vendored
@@ -1,41 +0,0 @@
|
||||
name: Report Workflow Stats
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- Add `external` label to issues and PRs created by external users
|
||||
- Benchmarking
|
||||
- Build and Test
|
||||
- Build and Test Locally
|
||||
- Build build-tools image
|
||||
- Check Permissions
|
||||
- Check build-tools image
|
||||
- Check neon with extra platform builds
|
||||
- Cloud Regression Test
|
||||
- Create Release Branch
|
||||
- Handle `approved-for-ci-run` label
|
||||
- Lint GitHub Workflows
|
||||
- Notify Slack channel about upcoming release
|
||||
- Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
|
||||
- Pin build-tools image
|
||||
- Prepare benchmarking databases by restoring dumps
|
||||
- Push images to ACR
|
||||
- Test Postgres client libraries
|
||||
- Trigger E2E Tests
|
||||
- cleanup caches by a branch
|
||||
types: [completed]
|
||||
|
||||
jobs:
|
||||
gh-workflow-stats:
|
||||
name: Github Workflow Stats
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export GH Workflow Stats
|
||||
uses: fedordikarev/gh-workflow-stats-action@v0.1.2
|
||||
with:
|
||||
DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
DB_TABLE: "gh_workflow_stats_neon"
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_RUN_ID: ${{ github.event.workflow_run.id }}
|
||||
@@ -1,6 +1,5 @@
|
||||
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
||||
/storage_controller @neondatabase/storage
|
||||
/storage_scrubber @neondatabase/storage
|
||||
/libs/pageserver_api/ @neondatabase/storage
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
|
||||
/libs/remote_storage/ @neondatabase/storage
|
||||
|
||||
106
Cargo.lock
generated
106
Cargo.lock
generated
@@ -668,20 +668,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
version = "0.6.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"axum-core 0.3.4",
|
||||
"base64 0.21.1",
|
||||
"bitflags 1.3.2",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
"hyper 0.14.30",
|
||||
"itoa",
|
||||
"matchit 0.7.0",
|
||||
"memchr",
|
||||
@@ -694,13 +693,56 @@ dependencies = [
|
||||
"serde_path_to_error",
|
||||
"serde_urlencoded",
|
||||
"sha1",
|
||||
"sync_wrapper 1.0.1",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core 0.4.5",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"itoa",
|
||||
"matchit 0.7.0",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
"mime",
|
||||
"rustversion",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -721,7 +763,6 @@ dependencies = [
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1220,7 +1261,6 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"camino",
|
||||
"cfg-if",
|
||||
"chrono",
|
||||
"clap",
|
||||
@@ -1820,7 +1860,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
|
||||
dependencies = [
|
||||
"base16ct 0.2.0",
|
||||
"base64ct",
|
||||
"crypto-bigint 0.5.5",
|
||||
"digest",
|
||||
"ff 0.13.0",
|
||||
@@ -1830,8 +1869,6 @@ dependencies = [
|
||||
"pkcs8 0.10.2",
|
||||
"rand_core 0.6.4",
|
||||
"sec1 0.7.3",
|
||||
"serde_json",
|
||||
"serdect",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
@@ -4040,8 +4077,6 @@ dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
"postgres-protocol",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5261,7 +5296,6 @@ dependencies = [
|
||||
"der 0.7.8",
|
||||
"generic-array",
|
||||
"pkcs8 0.10.2",
|
||||
"serdect",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
@@ -5516,16 +5550,6 @@ dependencies = [
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serdect"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177"
|
||||
dependencies = [
|
||||
"base16ct 0.2.0",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.5"
|
||||
@@ -6306,9 +6330,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-tungstenite"
|
||||
version = "0.21.0"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38"
|
||||
checksum = "2b2dbec703c26b00d74844519606ef15d09a7d6857860f84ad223dec002ddea2"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"log",
|
||||
@@ -6375,7 +6399,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"axum 0.7.5",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"h2 0.4.4",
|
||||
@@ -6582,14 +6606,14 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "tungstenite"
|
||||
version = "0.21.0"
|
||||
version = "0.20.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1"
|
||||
checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http 1.1.0",
|
||||
"http 0.2.9",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
@@ -6817,7 +6841,7 @@ name = "vm_monitor"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum 0.6.20",
|
||||
"cgroups-rs",
|
||||
"clap",
|
||||
"futures",
|
||||
@@ -7276,8 +7300,12 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum-core",
|
||||
"aws-config",
|
||||
"aws-runtime",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"base64 0.21.1",
|
||||
"base64ct",
|
||||
"bytes",
|
||||
@@ -7318,7 +7346,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"parquet",
|
||||
"postgres-types",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"prost",
|
||||
@@ -7328,6 +7355,7 @@ dependencies = [
|
||||
"regex-automata 0.4.3",
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.21.11",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -7343,7 +7371,6 @@ dependencies = [
|
||||
"time",
|
||||
"time-macros",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
@@ -7352,6 +7379,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"url",
|
||||
"uuid",
|
||||
"zeroize",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
|
||||
10
Cargo.toml
10
Cargo.toml
@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.52"
|
||||
aws-sdk-iam = "1.46.0"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
@@ -61,7 +61,7 @@ aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
|
||||
aws-types = "1.3"
|
||||
axum = { version = "0.7.5", features = ["ws"] }
|
||||
axum = { version = "0.6.20", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.70"
|
||||
@@ -99,10 +99,10 @@ http-types = { version = "2", default-features = false }
|
||||
http-body-util = "0.1.2"
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1.1"
|
||||
hyper0 = { package = "hyper", version = "0.14" }
|
||||
hyper = "1.4"
|
||||
hyper = "0.14"
|
||||
hyper_1 = { package = "hyper", version = "1.4" }
|
||||
hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
tokio-tungstenite = "0.20.0"
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
ipnet = "2.9.0"
|
||||
|
||||
14
Makefile
14
Makefile
@@ -168,27 +168,27 @@ postgres-check-%: postgres-%
|
||||
neon-pg-ext-%: postgres-%
|
||||
+@echo "Compiling neon $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
|
||||
+@echo "Compiling neon_walredo $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
|
||||
+@echo "Compiling neon_rmgr $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
|
||||
+@echo "Compiling neon_test_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
|
||||
+@echo "Compiling neon_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
|
||||
|
||||
@@ -220,7 +220,7 @@ neon-pg-clean-ext-%:
|
||||
walproposer-lib: neon-pg-ext-v17
|
||||
+@echo "Compiling walproposer-lib"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
@@ -333,7 +333,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
# Indent pxgn/neon.
|
||||
.PHONY: neon-pgindent
|
||||
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
|
||||
@@ -58,7 +58,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf openssl flex bison icu4c pkg-config m4
|
||||
brew install protobuf openssl flex bison icu4c pkg-config
|
||||
|
||||
# add openssl to PATH, required for ed25519 keys generation in neon_local
|
||||
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
|
||||
|
||||
@@ -17,13 +17,14 @@ RUN case $DEBIAN_FLAVOR in \
|
||||
# Version-specific installs for Bullseye (PG14-PG16):
|
||||
# The h3_pg extension needs a cmake 3.20+, but Debian bullseye has 3.18.
|
||||
# Install newer version (3.25) from backports.
|
||||
# libstdc++-10-dev is required for plv8
|
||||
bullseye*) \
|
||||
echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/bullseye-backports.list; \
|
||||
VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports"; \
|
||||
VERSION_INSTALLS="cmake/bullseye-backports cmake-data/bullseye-backports libstdc++-10-dev"; \
|
||||
;; \
|
||||
# Version-specific installs for Bookworm (PG17):
|
||||
bookworm*) \
|
||||
VERSION_INSTALLS="cmake"; \
|
||||
VERSION_INSTALLS="cmake libstdc++-12-dev"; \
|
||||
;; \
|
||||
esac && \
|
||||
apt update && \
|
||||
@@ -116,7 +117,7 @@ RUN apt update && \
|
||||
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
|
||||
# and also we must check backward compatibility with older versions of PostGIS.
|
||||
#
|
||||
# Use new version only for v17
|
||||
# To move faster, use newer versions only for v17
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
export SFCGAL_VERSION=1.4.1 \
|
||||
@@ -164,6 +165,7 @@ RUN case "${PG_VERSION}" in \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
cd extensions/postgis && \
|
||||
make clean && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
|
||||
@@ -181,27 +183,10 @@ RUN case "${PG_VERSION}" in \
|
||||
cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
|
||||
cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis
|
||||
|
||||
# Uses versioned libraries, i.e. libpgrouting-3.4
|
||||
# and may introduce function signature changes between releases
|
||||
# i.e. release 3.5.0 has new signature for pg_dijkstra function
|
||||
#
|
||||
# Use new version only for v17
|
||||
# last release v3.6.2 - Mar 30, 2024
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
export PGROUTING_VERSION=3.6.2 \
|
||||
export PGROUTING_CHECKSUM=f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 \
|
||||
;; \
|
||||
"v14" | "v15" | "v16") \
|
||||
export PGROUTING_VERSION=3.4.2 \
|
||||
export PGROUTING_CHECKSUM=cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/pgRouting/pgrouting/archive/v${PGROUTING_VERSION}.tar.gz -O pgrouting.tar.gz && \
|
||||
echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \
|
||||
# not version-specific
|
||||
# last release v3.6.2 - Mar 30, 2023
|
||||
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.6.2.tar.gz -O pgrouting.tar.gz && \
|
||||
echo "f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 pgrouting.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
@@ -223,18 +208,18 @@ FROM build-deps AS plv8-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
apt update && \
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
|
||||
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
|
||||
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
|
||||
# plv8 3.2.3 supports v17
|
||||
# last release v3.2.3 - Sep 7, 2024
|
||||
# clone the repo instead of downloading the release tarball because plv8 has submodule dependencies
|
||||
# and the release tarball doesn't include them
|
||||
ENV PLV8_TAG=v3.2.3
|
||||
RUN set -e \
|
||||
&& git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
|
||||
tar -czf plv8.tar.gz --exclude .git plv8-src && \
|
||||
cd plv8-src && \
|
||||
# generate and copy upgrade scripts
|
||||
mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
|
||||
cp upgrade/* /usr/local/pgsql/share/extension/ && \
|
||||
@@ -244,8 +229,10 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
|
||||
# don't break computes with installed old version of plv8
|
||||
cd /usr/local/pgsql/lib/ && \
|
||||
ln -s plv8-3.1.10.so plv8-3.1.5.so && \
|
||||
ln -s plv8-3.1.10.so plv8-3.1.8.so && \
|
||||
# TODO test this case !!
|
||||
ln -s plv8-3.2.3.so plv8-3.1.8.so && \
|
||||
ln -s plv8-3.2.3.so plv8-3.1.5.so && \
|
||||
ln -s plv8-3.2.3.so plv8-3.1.10.so && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
|
||||
@@ -323,6 +310,8 @@ COPY compute/patches/pgvector.patch /pgvector.patch
|
||||
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
||||
# because we build the images on different machines than where we run them.
|
||||
# Pass OPTFLAGS="" to remove it.
|
||||
#
|
||||
# v17 is not supported yet because of upstream issue https://github.com/pgvector/pgvector/issues/669
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
@@ -362,11 +351,10 @@ FROM build-deps AS hypopg-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
|
||||
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
|
||||
# HypoPG 1.4.1 supports v17
|
||||
# last release 1.4.1 - Apr 28, 2024
|
||||
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.1.tar.gz -O hypopg.tar.gz && \
|
||||
echo "9afe6357fd389d8d33fad81703038ce520b09275ec00153c6c89282bcdedd6bc hypopg.tar.gz" | sha256sum --check && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -402,7 +390,8 @@ ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY compute/patches/rum.patch /rum.patch
|
||||
|
||||
# maybe version-specific
|
||||
# last release 1.3.13 - Sep 19, 2022
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
@@ -424,11 +413,10 @@ FROM build-deps AS pgtap-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
|
||||
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
|
||||
# pgtap 1.3.3 supports v17
|
||||
# last release v1.3.3 - Apr 8, 2024
|
||||
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.3.3.tar.gz -O pgtap.tar.gz && \
|
||||
echo "325ea79d0d2515bce96bce43f6823dcd3effbd6c54cb2a4d6c2384fffa3a14c7 pgtap.tar.gz" | sha256sum --check && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -483,7 +471,7 @@ ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v2.18 - Aug 29, 2023
|
||||
# last release v2.18 - Sep 15, 2023
|
||||
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
|
||||
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
|
||||
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
@@ -501,11 +489,10 @@ FROM build-deps AS plpgsql-check-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
|
||||
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
|
||||
# plpgsql_check v2.7.11 supports v17
|
||||
# last release v2.7.11 - Sep 16, 2024
|
||||
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.7.11.tar.gz -O plpgsql_check.tar.gz && \
|
||||
echo "208933f8dbe8e0d2628eb3851e9f52e6892b8e280c63700c0f1ce7883625d172 plpgsql_check.tar.gz" | sha256sum --check && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
@@ -523,6 +510,8 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
# v17 is not supported yet. TimescaleDB 2.17.0 will support it
|
||||
# https://github.com/timescale/timescaledb/issues/6949
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
@@ -557,10 +546,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
case "${PG_VERSION}" in \
|
||||
# version-specific, has separate releases for each version
|
||||
# TODO check minor upgrades for v14-16
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14") \
|
||||
export PG_HINT_PLAN_VERSION=14_1_4_1 \
|
||||
export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
|
||||
@@ -574,7 +562,8 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
|
||||
;; \
|
||||
"v17") \
|
||||
echo "TODO: PG17 pg_hint_plan support" && exit 0 \
|
||||
export PG_HINT_PLAN_VERSION=17_1_7_0 \
|
||||
export PG_HINT_PLAN_CHECKSUM=06dd306328c67a4248f48403c50444f30959fb61ebe963248dbc2afb396fe600 \
|
||||
;; \
|
||||
*) \
|
||||
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
|
||||
@@ -598,6 +587,9 @@ FROM build-deps AS pg-cron-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# 1.6.4 available, supports v17
|
||||
# FIXME: Why is it here? We don't support pg_cron...
|
||||
# !Do not remove! We set it in shared_preload_libraries and computes will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
@@ -619,23 +611,21 @@ FROM build-deps AS rdkit-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
apt-get update && \
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y \
|
||||
libboost-iostreams1.74-dev \
|
||||
libboost-regex1.74-dev \
|
||||
libboost-serialization1.74-dev \
|
||||
libboost-system1.74-dev \
|
||||
#TODO check what exactly is needed
|
||||
libboost-all-dev \
|
||||
libeigen3-dev
|
||||
|
||||
# rdkit Release_2024_09_1 supports v17
|
||||
# last release Release_2024_09_1 - Sep 27, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
||||
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2024_09_1.tar.gz -O rdkit.tar.gz && \
|
||||
echo "034c00d6e9de323506834da03400761ed8c3721095114369d06805409747a60f rdkit.tar.gz" | sha256sum --check && \
|
||||
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||
cmake \
|
||||
-D RDK_BUILD_CAIRO_SUPPORT=OFF \
|
||||
@@ -674,12 +664,11 @@ FROM build-deps AS pg-uuidv7-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# not version-specific
|
||||
# last release v1.5.0 - Mar 21, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.5.0.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
echo "5f53e5ce0fa4e01c1d489f736478224571d4b2b88eaf63186265e38f0b8d3d78 pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -715,27 +704,11 @@ FROM build-deps AS pg-semver-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# Release 0.40.0 breaks backward compatibility with previous versions
|
||||
# see release note https://github.com/theory/pg-semver/releases/tag/v0.40.0
|
||||
# Use new version only for v17
|
||||
#
|
||||
# not version-specific
|
||||
# last release v0.40.0 - Jul 22, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
export SEMVER_VERSION=0.40.0 \
|
||||
export SEMVER_CHECKSUM=3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc \
|
||||
;; \
|
||||
"v14" | "v15" | "v16") \
|
||||
export SEMVER_VERSION=0.32.1 \
|
||||
export SEMVER_CHECKSUM=fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/theory/pg-semver/archive/refs/tags/v${SEMVER_VERSION}.tar.gz -O pg_semver.tar.gz && \
|
||||
echo "${SEMVER_CHECKSUM} pg_semver.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.40.0.tar.gz -O pg_semver.tar.gz && \
|
||||
echo "3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc pg_semver.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -750,6 +723,8 @@ RUN case "${PG_VERSION}" in \
|
||||
FROM build-deps AS pg-embedding-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# This is our extension, support stopped in favor of pgvector
|
||||
# TODO: deprecate it
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in \
|
||||
@@ -776,6 +751,9 @@ FROM build-deps AS pg-anon-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# This was experimental extension, that never got to real production.
|
||||
# TODO deprecate it
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
@@ -930,6 +908,9 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release
|
||||
# it's needed to enable extension because it uses untrusted C language
|
||||
# sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_session_jwt.control && \
|
||||
# echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_session_jwt.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -942,13 +923,12 @@ FROM build-deps AS wal2json-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# wal2json wal2json_2_6 supports v17
|
||||
# last release wal2json_2_6 - Apr 25, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
||||
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_6.tar.gz -O wal2json.tar.gz && \
|
||||
echo "18b4bdec28c74a8fc98a11c72de38378a760327ef8e5e42e975b0029eb96ba0d wal2json.tar.gz" | sha256sum --check && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
@@ -962,12 +942,11 @@ FROM build-deps AS pg-ivm-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# pg_ivm v1.9 supports v17
|
||||
# last release v1.9 - Jul 31
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
||||
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.9.tar.gz -O pg_ivm.tar.gz && \
|
||||
echo "59e15722939f274650abf637f315dd723c87073496ca77236b044cb205270d8b pg_ivm.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -983,12 +962,11 @@ FROM build-deps AS pg-partman-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# should support v17 https://github.com/pgpartman/pg_partman/discussions/693
|
||||
# last release 5.1.0 Apr 2, 2024
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_partman doesn't support PG17 yet" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
||||
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz -O pg_partman.tar.gz && \
|
||||
echo "3e3a27d7ff827295d5c55ef72f07a49062d6204b3cb0b9a048645d6db9f3cb9f pg_partman.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -1125,20 +1103,6 @@ RUN set -e \
|
||||
&& make -j $(nproc) dist_man_MANS= \
|
||||
&& make install dist_man_MANS=
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile the Neon-specific `local_proxy` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
|
||||
ARG BUILD_TAG
|
||||
ENV BUILD_TAG=$BUILD_TAG
|
||||
|
||||
USER nonroot
|
||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||
COPY --chown=nonroot . .
|
||||
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "postgres-exporter" and "sql-exporter"
|
||||
@@ -1277,10 +1241,6 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
||||
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
|
||||
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
|
||||
|
||||
# local_proxy and its config
|
||||
COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
|
||||
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
|
||||
|
||||
# Metrics exporter binaries and configuration files
|
||||
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
|
||||
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
|
||||
|
||||
@@ -19,10 +19,6 @@ commands:
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -11,13 +11,12 @@ testing = []
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
cfg-if.workspace = true
|
||||
clap.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
hyper = { workspace = true, features = ["full"] }
|
||||
nix.workspace = true
|
||||
notify.workspace = true
|
||||
num_cpus.workspace = true
|
||||
|
||||
@@ -402,7 +402,8 @@ fn start_postgres(
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.set_status(ComputeStatus::Init, &compute.state_changed);
|
||||
state.status = ComputeStatus::Init;
|
||||
compute.state_changed.notify_all();
|
||||
|
||||
info!(
|
||||
"running compute with features: {:?}",
|
||||
|
||||
@@ -34,7 +34,6 @@ use nix::sys::signal::{kill, Signal};
|
||||
use remote_storage::{DownloadError, RemotePath};
|
||||
|
||||
use crate::checker::create_availability_check_data;
|
||||
use crate::local_proxy;
|
||||
use crate::logger::inlinify;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
@@ -109,18 +108,6 @@ impl ComputeState {
|
||||
metrics: ComputeMetrics::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {
|
||||
let prev = self.status;
|
||||
info!("Changing compute status from {} to {}", prev, status);
|
||||
self.status = status;
|
||||
state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
|
||||
self.error = Some(format!("{err:?}"));
|
||||
self.set_status(ComputeStatus::Failed, state_changed);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ComputeState {
|
||||
@@ -315,12 +302,15 @@ impl ComputeNode {
|
||||
|
||||
pub fn set_status(&self, status: ComputeStatus) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.set_status(status, &self.state_changed);
|
||||
state.status = status;
|
||||
self.state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn set_failed_status(&self, err: anyhow::Error) {
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.set_failed_status(err, &self.state_changed);
|
||||
state.error = Some(format!("{err:?}"));
|
||||
state.status = ComputeStatus::Failed;
|
||||
self.state_changed.notify_all();
|
||||
}
|
||||
|
||||
pub fn get_status(&self) -> ComputeStatus {
|
||||
@@ -896,11 +886,6 @@ impl ComputeNode {
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
if let Some(ref local_proxy) = spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
|
||||
}
|
||||
|
||||
// Run migrations separately to not hold up cold starts
|
||||
thread::spawn(move || {
|
||||
let mut connstr = connstr.clone();
|
||||
@@ -951,19 +936,6 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(ref local_proxy) = spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
|
||||
// Spawn a thread to do the configuration,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let local_proxy = local_proxy.clone();
|
||||
let _handle = Some(thread::spawn(move || {
|
||||
if let Err(err) = local_proxy::configure(&local_proxy) {
|
||||
error!("error while configuring local_proxy: {err:?}");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
// Write new config
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
@@ -1051,19 +1023,6 @@ impl ComputeNode {
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(local_proxy) = &pspec.spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
|
||||
// Spawn a thread to do the configuration,
|
||||
// so that we don't block the main thread that starts Postgres.
|
||||
let local_proxy = local_proxy.clone();
|
||||
let _handle = thread::spawn(move || {
|
||||
if let Err(err) = local_proxy::configure(&local_proxy) {
|
||||
error!("error while configuring local_proxy: {err:?}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
info!(
|
||||
"start_compute spec.remote_extensions {:?}",
|
||||
pspec.spec.remote_extensions
|
||||
@@ -1484,28 +1443,6 @@ LIMIT 100",
|
||||
info!("Pageserver config changed");
|
||||
}
|
||||
}
|
||||
|
||||
// Gather info about installed extensions
|
||||
pub fn get_installed_extensions(&self) -> Result<()> {
|
||||
let connstr = self.connstr.clone();
|
||||
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("failed to create runtime");
|
||||
let result = rt
|
||||
.block_on(crate::installed_extensions::get_installed_extensions(
|
||||
connstr,
|
||||
))
|
||||
.expect("failed to get installed extensions");
|
||||
|
||||
info!(
|
||||
"{}",
|
||||
serde_json::to_string(&result).expect("failed to serialize extensions list")
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward_termination_signal() {
|
||||
|
||||
@@ -24,7 +24,8 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||
// Re-check the status after waking up
|
||||
if state.status == ComputeStatus::ConfigurationPending {
|
||||
info!("got configuration request");
|
||||
state.set_status(ComputeStatus::Configuration, &compute.state_changed);
|
||||
state.status = ComputeStatus::Configuration;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
|
||||
let mut new_status = ComputeStatus::Failed;
|
||||
|
||||
@@ -165,32 +165,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
}
|
||||
}
|
||||
|
||||
// get the list of installed extensions
|
||||
// currently only used in python tests
|
||||
// TODO: call it from cplane
|
||||
(&Method::GET, "/installed_extensions") => {
|
||||
info!("serving /installed_extensions GET request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for extensions request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return Response::new(Body::from(msg));
|
||||
}
|
||||
|
||||
let connstr = compute.connstr.clone();
|
||||
let res = crate::installed_extensions::get_installed_extensions(connstr).await;
|
||||
match res {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(e) => render_json_error(
|
||||
&format!("could not get list of installed extensions: {}", e),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// download extension files from remote extension storage on demand
|
||||
(&Method::POST, route) if route.starts_with("/extension_server/") => {
|
||||
info!("serving {:?} POST request", route);
|
||||
@@ -314,7 +288,8 @@ async fn handle_configure_request(
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.pspec = Some(parsed_spec);
|
||||
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
|
||||
state.status = ComputeStatus::ConfigurationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
info!("set new spec and notified waiters");
|
||||
}
|
||||
@@ -387,15 +362,15 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
|
||||
}
|
||||
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for termination request: {}",
|
||||
state.status
|
||||
"invalid compute status for termination request: {:?}",
|
||||
state.status.clone()
|
||||
);
|
||||
return Err((msg, StatusCode::PRECONDITION_FAILED));
|
||||
}
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
state.status = ComputeStatus::TerminationPending;
|
||||
compute.state_changed.notify_all();
|
||||
drop(state);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
@@ -409,8 +384,7 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
info!(
|
||||
"waiting for compute to become {}, current status: {:?}",
|
||||
ComputeStatus::Terminated,
|
||||
"waiting for compute to become Terminated, current status: {:?}",
|
||||
state.status
|
||||
);
|
||||
}
|
||||
|
||||
@@ -53,20 +53,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeInsights"
|
||||
|
||||
/installed_extensions:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get installed extensions.
|
||||
description: ""
|
||||
operationId: getInstalledExtensions
|
||||
responses:
|
||||
200:
|
||||
description: List of installed extensions
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/InstalledExtensions"
|
||||
/info:
|
||||
get:
|
||||
tags:
|
||||
@@ -409,24 +395,6 @@ components:
|
||||
- configuration
|
||||
example: running
|
||||
|
||||
InstalledExtensions:
|
||||
type: object
|
||||
properties:
|
||||
extensions:
|
||||
description: Contains list of installed extensions.
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
extname:
|
||||
type: string
|
||||
versions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
n_databases:
|
||||
type: integer
|
||||
|
||||
#
|
||||
# Errors
|
||||
#
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use url::Url;
|
||||
|
||||
use anyhow::Result;
|
||||
use postgres::{Client, NoTls};
|
||||
use tokio::task;
|
||||
|
||||
/// We don't reuse get_existing_dbs() just for code clarity
|
||||
/// and to make database listing query here more explicit.
|
||||
///
|
||||
/// Limit the number of databases to 500 to avoid excessive load.
|
||||
fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
// `pg_database.datconnlimit = -2` means that the database is in the
|
||||
// invalid state
|
||||
let databases = client
|
||||
.query(
|
||||
"SELECT datname FROM pg_catalog.pg_database
|
||||
WHERE datallowconn
|
||||
AND datconnlimit <> - 2
|
||||
LIMIT 500",
|
||||
&[],
|
||||
)?
|
||||
.iter()
|
||||
.map(|row| {
|
||||
let db: String = row.get("datname");
|
||||
db
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(databases)
|
||||
}
|
||||
|
||||
/// Connect to every database (see list_dbs above) and get the list of installed extensions.
|
||||
/// Same extension can be installed in multiple databases with different versions,
|
||||
/// we only keep the highest and lowest version across all databases.
|
||||
pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtensions> {
|
||||
let mut connstr = connstr.clone();
|
||||
|
||||
task::spawn_blocking(move || {
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
let databases: Vec<String> = list_dbs(&mut client)?;
|
||||
|
||||
let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
|
||||
for db in databases.iter() {
|
||||
connstr.set_path(db);
|
||||
let mut db_client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
let extensions: Vec<(String, String)> = db_client
|
||||
.query(
|
||||
"SELECT extname, extversion FROM pg_catalog.pg_extension;",
|
||||
&[],
|
||||
)?
|
||||
.iter()
|
||||
.map(|row| (row.get("extname"), row.get("extversion")))
|
||||
.collect();
|
||||
|
||||
for (extname, v) in extensions.iter() {
|
||||
let version = v.to_string();
|
||||
extensions_map
|
||||
.entry(extname.to_string())
|
||||
.and_modify(|e| {
|
||||
e.versions.insert(version.clone());
|
||||
// count the number of databases where the extension is installed
|
||||
e.n_databases += 1;
|
||||
})
|
||||
.or_insert(InstalledExtension {
|
||||
extname: extname.to_string(),
|
||||
versions: HashSet::from([version.clone()]),
|
||||
n_databases: 1,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(InstalledExtensions {
|
||||
extensions: extensions_map.values().cloned().collect(),
|
||||
})
|
||||
})
|
||||
.await?
|
||||
}
|
||||
@@ -2,9 +2,6 @@
|
||||
//! configuration.
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod configurator;
|
||||
@@ -15,8 +12,6 @@ pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod disk_quota;
|
||||
pub mod extension_server;
|
||||
pub mod installed_extensions;
|
||||
pub mod local_proxy;
|
||||
pub mod lsn_lease;
|
||||
mod migration;
|
||||
pub mod monitor;
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
//! Local Proxy is a feature of our BaaS Neon Authorize project.
|
||||
//!
|
||||
//! Local Proxy validates JWTs and manages the pg_session_jwt extension.
|
||||
//! It also maintains a connection pool to postgres.
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use camino::Utf8Path;
|
||||
use compute_api::spec::LocalProxySpec;
|
||||
use nix::sys::signal::Signal;
|
||||
use utils::pid_file::{self, PidFileRead};
|
||||
|
||||
pub fn configure(local_proxy: &LocalProxySpec) -> Result<()> {
|
||||
write_local_proxy_conf("/etc/local_proxy/config.json".as_ref(), local_proxy)?;
|
||||
notify_local_proxy("/etc/local_proxy/pid".as_ref())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
fn write_local_proxy_conf(path: &Utf8Path, local_proxy: &LocalProxySpec) -> Result<()> {
|
||||
let config =
|
||||
serde_json::to_string_pretty(local_proxy).context("serializing LocalProxySpec to json")?;
|
||||
std::fs::write(path, config).with_context(|| format!("writing {path}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify local proxy about a new config file.
|
||||
fn notify_local_proxy(path: &Utf8Path) -> Result<()> {
|
||||
match pid_file::read(path)? {
|
||||
// if the file doesn't exist, or isn't locked, local_proxy isn't running
|
||||
// and will naturally pick up our config later
|
||||
PidFileRead::NotExist | PidFileRead::NotHeldByAnyProcess(_) => {}
|
||||
PidFileRead::LockedByOtherProcess(pid) => {
|
||||
// From the pid_file docs:
|
||||
//
|
||||
// > 1. The other process might exit at any time, turning the given PID stale.
|
||||
// > 2. There is a small window in which `claim_for_current_process` has already
|
||||
// > locked the file but not yet updates its contents. [`read`] will return
|
||||
// > this variant here, but with the old file contents, i.e., a stale PID.
|
||||
// >
|
||||
// > The kernel is free to recycle PID once it has been `wait(2)`ed upon by
|
||||
// > its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
|
||||
// > system call on it, bears the risk of killing an unrelated process.
|
||||
// > This is an inherent limitation of using pidfiles.
|
||||
// > The only race-free solution is to have a supervisor-process with a lifetime
|
||||
// > that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
|
||||
//
|
||||
// This is an ok risk as we only send a SIGHUP which likely won't actually
|
||||
// kill the process, only reload config.
|
||||
nix::sys::signal::kill(pid, Signal::SIGHUP).context("sending signal to local_proxy")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
@@ -190,15 +189,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let mut xact = client.transaction()?;
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
|
||||
let mut jwks_roles = HashSet::new();
|
||||
if let Some(local_proxy) = &spec.local_proxy_config {
|
||||
for jwks_setting in local_proxy.jwks.iter().flatten() {
|
||||
for role_name in &jwks_setting.role_names {
|
||||
jwks_roles.insert(role_name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print a list of existing Postgres roles (only in debug mode)
|
||||
if span_enabled!(Level::INFO) {
|
||||
let mut vec = Vec::new();
|
||||
@@ -318,9 +308,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
|
||||
name.pg_quote()
|
||||
);
|
||||
if jwks_roles.contains(name.as_str()) {
|
||||
query = format!("CREATE ROLE {}", name.pg_quote());
|
||||
}
|
||||
info!("running role create query: '{}'", &query);
|
||||
query.push_str(&role.to_pg_options());
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
|
||||
@@ -14,7 +14,7 @@ humantime.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper0.workspace = true
|
||||
hyper.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest = { workspace = true, features = ["blocking", "json"] }
|
||||
scopeguard.workspace = true
|
||||
|
||||
@@ -599,7 +599,6 @@ impl Endpoint {
|
||||
remote_extensions,
|
||||
pgbouncer_settings: None,
|
||||
shard_stripe_size: Some(shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
|
||||
@@ -3,7 +3,7 @@ use crate::{
|
||||
local_env::{LocalEnv, NeonStorageControllerConf},
|
||||
};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hyper0::Uri;
|
||||
use hyper::Uri;
|
||||
use nix::unistd::Pid;
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
Currently we build two main images:
|
||||
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/Dockerfile.compute-node).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14.
|
||||
|
||||
And additional intermediate image:
|
||||
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Display;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
@@ -61,21 +58,6 @@ pub enum ComputeStatus {
|
||||
Terminated,
|
||||
}
|
||||
|
||||
impl Display for ComputeStatus {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ComputeStatus::Empty => f.write_str("empty"),
|
||||
ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
|
||||
ComputeStatus::Init => f.write_str("init"),
|
||||
ComputeStatus::Running => f.write_str("running"),
|
||||
ComputeStatus::Configuration => f.write_str("configuration"),
|
||||
ComputeStatus::Failed => f.write_str("failed"),
|
||||
ComputeStatus::TerminationPending => f.write_str("termination-pending"),
|
||||
ComputeStatus::Terminated => f.write_str("terminated"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
@@ -156,15 +138,3 @@ pub enum ControlPlaneComputeStatus {
|
||||
// should be able to start with provided spec.
|
||||
Attached,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
pub struct InstalledExtension {
|
||||
pub extname: String,
|
||||
pub versions: HashSet<String>,
|
||||
pub n_databases: u32, // Number of databases using this extension
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
pub struct InstalledExtensions {
|
||||
pub extensions: Vec<InstalledExtension>,
|
||||
}
|
||||
|
||||
@@ -106,10 +106,6 @@ pub struct ComputeSpec {
|
||||
// Stripe size for pageserver sharding, in pages
|
||||
#[serde(default)]
|
||||
pub shard_stripe_size: Option<usize>,
|
||||
|
||||
/// Local Proxy configuration used for JWT authentication
|
||||
#[serde(default)]
|
||||
pub local_proxy_config: Option<LocalProxySpec>,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
@@ -282,13 +278,11 @@ pub struct GenericOption {
|
||||
/// declare a `trait` on it.
|
||||
pub type GenericOptions = Option<Vec<GenericOption>>;
|
||||
|
||||
/// Configured the local_proxy application with the relevant JWKS and roles it should
|
||||
/// Configured the local-proxy application with the relevant JWKS and roles it should
|
||||
/// use for authorizing connect requests using JWT.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
pub struct LocalProxySpec {
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub jwks: Option<Vec<JwksSettings>>,
|
||||
pub jwks: Vec<JwksSettings>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
|
||||
@@ -104,7 +104,8 @@ pub struct ConfigToml {
|
||||
pub image_compression: ImageCompressionAlgorithm,
|
||||
pub ephemeral_bytes_per_memory_kb: usize,
|
||||
pub l0_flush: Option<crate::models::L0FlushConfig>,
|
||||
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
|
||||
pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
|
||||
pub io_buffer_alignment: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -295,14 +296,7 @@ pub mod defaults {
|
||||
|
||||
pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
|
||||
|
||||
/// Soft limit for the maximum size of a vectored read.
|
||||
///
|
||||
/// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
|
||||
/// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
|
||||
/// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
|
||||
/// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
|
||||
/// That is, slightly above 128 kB.
|
||||
pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
|
||||
pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 128 * 1024; // 128 KiB
|
||||
|
||||
pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
|
||||
ImageCompressionAlgorithm::Zstd { level: Some(1) };
|
||||
@@ -387,7 +381,10 @@ impl Default for ConfigToml {
|
||||
image_compression: (DEFAULT_IMAGE_COMPRESSION),
|
||||
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||
l0_flush: None,
|
||||
virtual_file_io_mode: None,
|
||||
virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
|
||||
|
||||
io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||
|
||||
tenant_config: TenantConfigToml::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -748,16 +748,6 @@ impl Key {
|
||||
self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_rel_dir_key(&self) -> bool {
|
||||
self.field1 == 0x00
|
||||
&& self.field2 != 0
|
||||
&& self.field3 != 0
|
||||
&& self.field4 == 0
|
||||
&& self.field5 == 0
|
||||
&& self.field6 == 1
|
||||
}
|
||||
|
||||
/// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
|
||||
#[inline(always)]
|
||||
pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
|
||||
@@ -972,6 +972,8 @@ pub struct TopTenantShardsResponse {
|
||||
}
|
||||
|
||||
pub mod virtual_file {
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(
|
||||
Copy,
|
||||
Clone,
|
||||
@@ -992,45 +994,50 @@ pub mod virtual_file {
|
||||
}
|
||||
|
||||
/// Direct IO modes for a pageserver.
|
||||
#[derive(
|
||||
Copy,
|
||||
Clone,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Hash,
|
||||
strum_macros::EnumString,
|
||||
strum_macros::Display,
|
||||
serde_with::DeserializeFromStr,
|
||||
serde_with::SerializeDisplay,
|
||||
Debug,
|
||||
)]
|
||||
#[strum(serialize_all = "kebab-case")]
|
||||
#[repr(u8)]
|
||||
pub enum IoMode {
|
||||
/// Uses buffered IO.
|
||||
Buffered,
|
||||
/// Uses direct IO, error out if the operation fails.
|
||||
#[cfg(target_os = "linux")]
|
||||
Direct,
|
||||
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
||||
pub enum DirectIoMode {
|
||||
/// Direct IO disabled (uses usual buffered IO).
|
||||
#[default]
|
||||
Disabled,
|
||||
/// Direct IO disabled (performs checks and perf simulations).
|
||||
Evaluate {
|
||||
/// Alignment check level
|
||||
alignment_check: DirectIoAlignmentCheckLevel,
|
||||
/// Latency padded for performance simulation.
|
||||
latency_padding: DirectIoLatencyPadding,
|
||||
},
|
||||
/// Direct IO enabled.
|
||||
Enabled {
|
||||
/// Actions to perform on alignment error.
|
||||
on_alignment_error: DirectIoOnAlignmentErrorAction,
|
||||
},
|
||||
}
|
||||
|
||||
impl IoMode {
|
||||
pub const fn preferred() -> Self {
|
||||
Self::Buffered
|
||||
}
|
||||
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum DirectIoAlignmentCheckLevel {
|
||||
#[default]
|
||||
Error,
|
||||
Log,
|
||||
None,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for IoMode {
|
||||
type Error = u8;
|
||||
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum DirectIoOnAlignmentErrorAction {
|
||||
Error,
|
||||
#[default]
|
||||
FallbackToBuffered,
|
||||
}
|
||||
|
||||
fn try_from(value: u8) -> Result<Self, Self::Error> {
|
||||
Ok(match value {
|
||||
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
|
||||
#[cfg(target_os = "linux")]
|
||||
v if v == (IoMode::Direct as u8) => IoMode::Direct,
|
||||
x => return Err(x),
|
||||
})
|
||||
}
|
||||
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
|
||||
#[serde(tag = "type", rename_all = "kebab-case")]
|
||||
pub enum DirectIoLatencyPadding {
|
||||
/// Pad virtual file operations with IO to a fake file.
|
||||
FakeFileRW { path: PathBuf },
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ aws-sdk-s3.workspace = true
|
||||
bytes.workspace = true
|
||||
camino = { workspace = true, features = ["serde1"] }
|
||||
humantime-serde.workspace = true
|
||||
hyper0 = { workspace = true, features = ["stream"] }
|
||||
hyper = { workspace = true, features = ["stream"] }
|
||||
futures.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::time::SystemTime;
|
||||
|
||||
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
||||
use anyhow::Result;
|
||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||
use azure_core::request_options::{MaxResults, Metadata, Range};
|
||||
use azure_core::{Continuable, RetryOptions};
|
||||
use azure_identity::DefaultAzureCredential;
|
||||
use azure_storage::StorageCredentials;
|
||||
@@ -33,10 +33,10 @@ use tracing::debug;
|
||||
use utils::backoff;
|
||||
|
||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
||||
use crate::ListingObject;
|
||||
use crate::{
|
||||
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
|
||||
DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
|
||||
TimeTravelError, TimeoutOrCancel,
|
||||
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing,
|
||||
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
|
||||
};
|
||||
|
||||
pub struct AzureBlobStorage {
|
||||
@@ -259,7 +259,6 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
|
||||
if let Some(http_err) = error.as_http_error() {
|
||||
match http_err.status() {
|
||||
StatusCode::NotFound => DownloadError::NotFound,
|
||||
StatusCode::NotModified => DownloadError::Unmodified,
|
||||
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
|
||||
_ => DownloadError::Other(anyhow::Error::new(error)),
|
||||
}
|
||||
@@ -485,23 +484,32 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
|
||||
|
||||
let builder = blob_client.get();
|
||||
|
||||
self.download_for_builder(builder, cancel).await
|
||||
}
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
|
||||
|
||||
let mut builder = blob_client.get();
|
||||
|
||||
if let Some(ref etag) = opts.etag {
|
||||
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
|
||||
}
|
||||
|
||||
if let Some((start, end)) = opts.byte_range() {
|
||||
builder = builder.range(match end {
|
||||
Some(end) => Range::Range(start..end),
|
||||
None => Range::RangeFrom(start..),
|
||||
});
|
||||
}
|
||||
let range: Range = if let Some(end_exclusive) = end_exclusive {
|
||||
(start_inclusive..end_exclusive).into()
|
||||
} else {
|
||||
(start_inclusive..).into()
|
||||
};
|
||||
builder = builder.range(range);
|
||||
|
||||
self.download_for_builder(builder, cancel).await
|
||||
}
|
||||
|
||||
@@ -5,8 +5,6 @@ pub enum DownloadError {
|
||||
BadInput(anyhow::Error),
|
||||
/// The file was not found in the remote storage.
|
||||
NotFound,
|
||||
/// The caller provided an ETag, and the file was not modified.
|
||||
Unmodified,
|
||||
/// A cancellation token aborted the download, typically during
|
||||
/// tenant detach or process shutdown.
|
||||
Cancelled,
|
||||
@@ -26,7 +24,6 @@ impl std::fmt::Display for DownloadError {
|
||||
write!(f, "Failed to download a remote file due to user input: {e}")
|
||||
}
|
||||
DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
|
||||
DownloadError::Unmodified => write!(f, "File was not modified"),
|
||||
DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
|
||||
DownloadError::Timeout => write!(f, "timeout"),
|
||||
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
|
||||
@@ -41,7 +38,7 @@ impl DownloadError {
|
||||
pub fn is_permanent(&self) -> bool {
|
||||
use DownloadError::*;
|
||||
match self {
|
||||
BadInput(_) | NotFound | Unmodified | Cancelled => true,
|
||||
BadInput(_) | NotFound | Cancelled => true,
|
||||
Timeout | Other(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,8 +19,7 @@ mod simulate_failures;
|
||||
mod support;
|
||||
|
||||
use std::{
|
||||
collections::HashMap, fmt::Debug, num::NonZeroU32, ops::Bound, pin::Pin, sync::Arc,
|
||||
time::SystemTime,
|
||||
collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime,
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
@@ -162,63 +161,6 @@ pub struct Listing {
|
||||
pub keys: Vec<ListingObject>,
|
||||
}
|
||||
|
||||
/// Options for downloads. The default value is a plain GET.
|
||||
pub struct DownloadOpts {
|
||||
/// If given, returns [`DownloadError::Unmodified`] if the object still has
|
||||
/// the same ETag (using If-None-Match).
|
||||
pub etag: Option<Etag>,
|
||||
/// The start of the byte range to download, or unbounded.
|
||||
pub byte_start: Bound<u64>,
|
||||
/// The end of the byte range to download, or unbounded. Must be after the
|
||||
/// start bound.
|
||||
pub byte_end: Bound<u64>,
|
||||
}
|
||||
|
||||
impl Default for DownloadOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
etag: Default::default(),
|
||||
byte_start: Bound::Unbounded,
|
||||
byte_end: Bound::Unbounded,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DownloadOpts {
|
||||
/// Returns the byte range with inclusive start and exclusive end, or None
|
||||
/// if unbounded.
|
||||
pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {
|
||||
if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {
|
||||
return None;
|
||||
}
|
||||
let start = match self.byte_start {
|
||||
Bound::Excluded(i) => i + 1,
|
||||
Bound::Included(i) => i,
|
||||
Bound::Unbounded => 0,
|
||||
};
|
||||
let end = match self.byte_end {
|
||||
Bound::Excluded(i) => Some(i),
|
||||
Bound::Included(i) => Some(i + 1),
|
||||
Bound::Unbounded => None,
|
||||
};
|
||||
if let Some(end) = end {
|
||||
assert!(start < end, "range end {end} at or before start {start}");
|
||||
}
|
||||
Some((start, end))
|
||||
}
|
||||
|
||||
/// Returns the byte range as an RFC 2616 Range header value with inclusive
|
||||
/// bounds, or None if unbounded.
|
||||
pub fn byte_range_header(&self) -> Option<String> {
|
||||
self.byte_range()
|
||||
.map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive
|
||||
.map(|(start, end)| match end {
|
||||
Some(end) => format!("bytes={start}-{end}"),
|
||||
None => format!("bytes={start}-"),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage (potentially remote) API to manage its state.
|
||||
/// This storage tries to be unaware of any layered repository context,
|
||||
/// providing basic CRUD operations for storage files.
|
||||
@@ -303,7 +245,21 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError>;
|
||||
|
||||
/// Streams a given byte range of the remote storage entry contents.
|
||||
///
|
||||
/// The returned download stream will obey initial timeout and cancellation signal by erroring
|
||||
/// on whichever happens first. Only one of the reasons will fail the stream, which is usually
|
||||
/// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
|
||||
///
|
||||
/// Returns the metadata, if any was stored with the file previously.
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError>;
|
||||
|
||||
@@ -445,18 +401,43 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
|
||||
/// See [`RemoteStorage::download`]
|
||||
pub async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
match self {
|
||||
Self::LocalFs(s) => s.download(from, opts, cancel).await,
|
||||
Self::AwsS3(s) => s.download(from, opts, cancel).await,
|
||||
Self::AzureBlob(s) => s.download(from, opts, cancel).await,
|
||||
Self::Unreliable(s) => s.download(from, opts, cancel).await,
|
||||
Self::LocalFs(s) => s.download(from, cancel).await,
|
||||
Self::AwsS3(s) => s.download(from, cancel).await,
|
||||
Self::AzureBlob(s) => s.download(from, cancel).await,
|
||||
Self::Unreliable(s) => s.download(from, cancel).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
match self {
|
||||
Self::LocalFs(s) => {
|
||||
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
|
||||
.await
|
||||
}
|
||||
Self::AwsS3(s) => {
|
||||
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
|
||||
.await
|
||||
}
|
||||
Self::AzureBlob(s) => {
|
||||
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
|
||||
.await
|
||||
}
|
||||
Self::Unreliable(s) => {
|
||||
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -581,6 +562,20 @@ impl GenericRemoteStorage {
|
||||
})
|
||||
}
|
||||
|
||||
/// Downloads the storage object into the `to_path` provided.
|
||||
/// `byte_range` could be specified to dowload only a part of the file, if needed.
|
||||
pub async fn download_storage_object(
|
||||
&self,
|
||||
byte_range: Option<(u64, Option<u64>)>,
|
||||
from: &RemotePath,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
match byte_range {
|
||||
Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
|
||||
None => self.download(from, cancel).await,
|
||||
}
|
||||
}
|
||||
|
||||
/// The name of the bucket/container/etc.
|
||||
pub fn bucket_name(&self) -> Option<&str> {
|
||||
match self {
|
||||
@@ -654,76 +649,6 @@ impl ConcurrencyLimiter {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges
|
||||
/// with optional end bound, or None when unbounded.
|
||||
#[test]
|
||||
fn download_opts_byte_range() {
|
||||
// Consider using test_case or a similar table-driven test framework.
|
||||
let cases = [
|
||||
// (byte_start, byte_end, expected)
|
||||
(Bound::Unbounded, Bound::Unbounded, None),
|
||||
(Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),
|
||||
(Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),
|
||||
(Bound::Included(3), Bound::Unbounded, Some((3, None))),
|
||||
(Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),
|
||||
(Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),
|
||||
(Bound::Excluded(3), Bound::Unbounded, Some((4, None))),
|
||||
(Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),
|
||||
(Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),
|
||||
// 1-sized ranges are fine, 0 aren't and will panic (separate test).
|
||||
(Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),
|
||||
(Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),
|
||||
];
|
||||
|
||||
for (byte_start, byte_end, expect) in cases {
|
||||
let opts = DownloadOpts {
|
||||
byte_start,
|
||||
byte_end,
|
||||
..Default::default()
|
||||
};
|
||||
let result = opts.byte_range();
|
||||
assert_eq!(
|
||||
result, expect,
|
||||
"byte_start={byte_start:?} byte_end={byte_end:?}"
|
||||
);
|
||||
|
||||
// Check generated HTTP header, which uses an inclusive range.
|
||||
let expect_header = expect.map(|(start, end)| match end {
|
||||
Some(end) => format!("bytes={start}-{}", end - 1), // inclusive end
|
||||
None => format!("bytes={start}-"),
|
||||
});
|
||||
assert_eq!(
|
||||
opts.byte_range_header(),
|
||||
expect_header,
|
||||
"byte_start={byte_start:?} byte_end={byte_end:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// DownloadOpts::byte_range() zero-sized byte range should panic.
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn download_opts_byte_range_zero() {
|
||||
DownloadOpts {
|
||||
byte_start: Bound::Included(3),
|
||||
byte_end: Bound::Excluded(3),
|
||||
..Default::default()
|
||||
}
|
||||
.byte_range();
|
||||
}
|
||||
|
||||
/// DownloadOpts::byte_range() negative byte range should panic.
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn download_opts_byte_range_negative() {
|
||||
DownloadOpts {
|
||||
byte_start: Bound::Included(3),
|
||||
byte_end: Bound::Included(2),
|
||||
..Default::default()
|
||||
}
|
||||
.byte_range();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_object_name() {
|
||||
let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
|
||||
|
||||
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use crate::{
|
||||
Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
|
||||
TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError,
|
||||
TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
use super::{RemoteStorage, StorageMetadata};
|
||||
@@ -494,41 +494,22 @@ impl RemoteStorage for LocalFs {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
|
||||
let file_metadata = file_metadata(&target_path).await?;
|
||||
let etag = mock_etag(&file_metadata);
|
||||
|
||||
if opts.etag.as_ref() == Some(&etag) {
|
||||
return Err(DownloadError::Unmodified);
|
||||
}
|
||||
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let mut take = file_metadata.len();
|
||||
if let Some((start, end)) = opts.byte_range() {
|
||||
if start > 0 {
|
||||
file.seek(io::SeekFrom::Start(start))
|
||||
.await
|
||||
.context("Failed to seek to the range start in a local storage file")
|
||||
.map_err(DownloadError::Other)?;
|
||||
}
|
||||
if let Some(end) = end {
|
||||
take = end - start;
|
||||
}
|
||||
}
|
||||
|
||||
let source = ReaderStream::new(file.take(take));
|
||||
let source = ReaderStream::new(
|
||||
fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
})
|
||||
.map_err(DownloadError::Other)?,
|
||||
);
|
||||
|
||||
let metadata = self
|
||||
.read_storage_metadata(&target_path)
|
||||
@@ -538,6 +519,69 @@ impl RemoteStorage for LocalFs {
|
||||
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
|
||||
let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
|
||||
|
||||
let etag = mock_etag(&file_metadata);
|
||||
Ok(Download {
|
||||
metadata,
|
||||
last_modified: file_metadata
|
||||
.modified()
|
||||
.map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context("Reading mtime")))?,
|
||||
etag,
|
||||
download_stream: Box::pin(source),
|
||||
})
|
||||
}
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
if let Some(end_exclusive) = end_exclusive {
|
||||
if end_exclusive <= start_inclusive {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
|
||||
};
|
||||
if start_inclusive == end_exclusive.saturating_sub(1) {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
|
||||
}
|
||||
}
|
||||
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
let file_metadata = file_metadata(&target_path).await?;
|
||||
let mut source = tokio::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let len = source
|
||||
.metadata()
|
||||
.await
|
||||
.context("query file length")
|
||||
.map_err(DownloadError::Other)?
|
||||
.len();
|
||||
|
||||
source
|
||||
.seek(io::SeekFrom::Start(start_inclusive))
|
||||
.await
|
||||
.context("Failed to seek to the range start in a local storage file")
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let metadata = self
|
||||
.read_storage_metadata(&target_path)
|
||||
.await
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let source = source.take(end_exclusive.unwrap_or(len) - start_inclusive);
|
||||
let source = ReaderStream::new(source);
|
||||
|
||||
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
|
||||
let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
|
||||
|
||||
let etag = mock_etag(&file_metadata);
|
||||
Ok(Download {
|
||||
metadata,
|
||||
last_modified: file_metadata
|
||||
@@ -639,7 +683,7 @@ mod fs_tests {
|
||||
use super::*;
|
||||
|
||||
use camino_tempfile::tempdir;
|
||||
use std::{collections::HashMap, io::Write, ops::Bound};
|
||||
use std::{collections::HashMap, io::Write};
|
||||
|
||||
async fn read_and_check_metadata(
|
||||
storage: &LocalFs,
|
||||
@@ -648,7 +692,7 @@ mod fs_tests {
|
||||
) -> anyhow::Result<String> {
|
||||
let cancel = CancellationToken::new();
|
||||
let download = storage
|
||||
.download(remote_storage_path, &DownloadOpts::default(), &cancel)
|
||||
.download(remote_storage_path, &cancel)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
|
||||
ensure!(
|
||||
@@ -729,8 +773,8 @@ mod fs_tests {
|
||||
"We should upload and download the same contents"
|
||||
);
|
||||
|
||||
let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
|
||||
match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
|
||||
let non_existing_path = "somewhere/else";
|
||||
match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await {
|
||||
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
|
||||
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
|
||||
}
|
||||
@@ -755,12 +799,10 @@ mod fs_tests {
|
||||
let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);
|
||||
|
||||
let first_part_download = storage
|
||||
.download(
|
||||
.download_byte_range(
|
||||
&upload_target,
|
||||
&DownloadOpts {
|
||||
byte_end: Bound::Excluded(first_part_local.len() as u64),
|
||||
..Default::default()
|
||||
},
|
||||
0,
|
||||
Some(first_part_local.len() as u64),
|
||||
&cancel,
|
||||
)
|
||||
.await?;
|
||||
@@ -776,15 +818,10 @@ mod fs_tests {
|
||||
);
|
||||
|
||||
let second_part_download = storage
|
||||
.download(
|
||||
.download_byte_range(
|
||||
&upload_target,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(first_part_local.len() as u64),
|
||||
byte_end: Bound::Excluded(
|
||||
(first_part_local.len() + second_part_local.len()) as u64,
|
||||
),
|
||||
..Default::default()
|
||||
},
|
||||
first_part_local.len() as u64,
|
||||
Some((first_part_local.len() + second_part_local.len()) as u64),
|
||||
&cancel,
|
||||
)
|
||||
.await?;
|
||||
@@ -800,14 +837,7 @@ mod fs_tests {
|
||||
);
|
||||
|
||||
let suffix_bytes = storage
|
||||
.download(
|
||||
&upload_target,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(13),
|
||||
..Default::default()
|
||||
},
|
||||
&cancel,
|
||||
)
|
||||
.download_byte_range(&upload_target, 13, None, &cancel)
|
||||
.await?
|
||||
.download_stream;
|
||||
let suffix_bytes = aggregate(suffix_bytes).await?;
|
||||
@@ -815,7 +845,7 @@ mod fs_tests {
|
||||
assert_eq!(upload_name, suffix);
|
||||
|
||||
let all_bytes = storage
|
||||
.download(&upload_target, &DownloadOpts::default(), &cancel)
|
||||
.download_byte_range(&upload_target, 0, None, &cancel)
|
||||
.await?
|
||||
.download_stream;
|
||||
let all_bytes = aggregate(all_bytes).await?;
|
||||
@@ -826,26 +856,48 @@ mod fs_tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic(expected = "at or before start")]
|
||||
async fn download_file_range_negative() {
|
||||
let (storage, cancel) = create_storage().unwrap();
|
||||
async fn download_file_range_negative() -> anyhow::Result<()> {
|
||||
let (storage, cancel) = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel)
|
||||
.await
|
||||
.unwrap();
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
|
||||
|
||||
storage
|
||||
.download(
|
||||
let start = 1_000_000_000;
|
||||
let end = start + 1;
|
||||
match storage
|
||||
.download_byte_range(
|
||||
&upload_target,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(10),
|
||||
byte_end: Bound::Excluded(10),
|
||||
..Default::default()
|
||||
},
|
||||
start,
|
||||
Some(end), // exclusive end
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
{
|
||||
Ok(_) => panic!("Should not allow downloading wrong ranges"),
|
||||
Err(e) => {
|
||||
let error_string = e.to_string();
|
||||
assert!(error_string.contains("zero bytes"));
|
||||
assert!(error_string.contains(&start.to_string()));
|
||||
assert!(error_string.contains(&end.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
let start = 10000;
|
||||
let end = 234;
|
||||
assert!(start > end, "Should test an incorrect range");
|
||||
match storage
|
||||
.download_byte_range(&upload_target, start, Some(end), &cancel)
|
||||
.await
|
||||
{
|
||||
Ok(_) => panic!("Should not allow downloading wrong ranges"),
|
||||
Err(e) => {
|
||||
let error_string = e.to_string();
|
||||
assert!(error_string.contains("Invalid range"));
|
||||
assert!(error_string.contains(&start.to_string()));
|
||||
assert!(error_string.contains(&end.to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -888,12 +940,10 @@ mod fs_tests {
|
||||
let (first_part_local, _) = uploaded_bytes.split_at(3);
|
||||
|
||||
let partial_download_with_metadata = storage
|
||||
.download(
|
||||
.download_byte_range(
|
||||
&upload_target,
|
||||
&DownloadOpts {
|
||||
byte_end: Bound::Excluded(first_part_local.len() as u64),
|
||||
..Default::default()
|
||||
},
|
||||
0,
|
||||
Some(first_part_local.len() as u64),
|
||||
&cancel,
|
||||
)
|
||||
.await?;
|
||||
@@ -1051,13 +1101,7 @@ mod fs_tests {
|
||||
storage.upload(body, len, &path, None, &cancel).await?;
|
||||
}
|
||||
|
||||
let read = aggregate(
|
||||
storage
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?
|
||||
.download_stream,
|
||||
)
|
||||
.await?;
|
||||
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
|
||||
assert_eq!(body, read);
|
||||
|
||||
let shorter = Bytes::from_static(b"shorter body");
|
||||
@@ -1068,13 +1112,7 @@ mod fs_tests {
|
||||
storage.upload(body, len, &path, None, &cancel).await?;
|
||||
}
|
||||
|
||||
let read = aggregate(
|
||||
storage
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?
|
||||
.download_stream,
|
||||
)
|
||||
.await?;
|
||||
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
|
||||
assert_eq!(shorter, read);
|
||||
Ok(())
|
||||
}
|
||||
@@ -1107,13 +1145,7 @@ mod fs_tests {
|
||||
storage.upload(body, len, &path, None, &cancel).await?;
|
||||
}
|
||||
|
||||
let read = aggregate(
|
||||
storage
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?
|
||||
.download_stream,
|
||||
)
|
||||
.await?;
|
||||
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?;
|
||||
assert_eq!(body, read);
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -28,13 +28,12 @@ use aws_sdk_s3::{
|
||||
Client,
|
||||
};
|
||||
use aws_smithy_async::rt::sleep::TokioSleep;
|
||||
use http_types::StatusCode;
|
||||
|
||||
use aws_smithy_types::{body::SdkBody, DateTime};
|
||||
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
|
||||
use bytes::Bytes;
|
||||
use futures::stream::Stream;
|
||||
use hyper0::Body;
|
||||
use hyper::Body;
|
||||
use scopeguard::ScopeGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::backoff;
|
||||
@@ -45,8 +44,8 @@ use crate::{
|
||||
error::Cancelled,
|
||||
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
||||
support::PermitCarrying,
|
||||
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
||||
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath,
|
||||
RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
@@ -68,7 +67,6 @@ pub struct S3Bucket {
|
||||
struct GetObjectRequest {
|
||||
bucket: String,
|
||||
key: String,
|
||||
etag: Option<String>,
|
||||
range: Option<String>,
|
||||
}
|
||||
impl S3Bucket {
|
||||
@@ -250,18 +248,13 @@ impl S3Bucket {
|
||||
|
||||
let started_at = start_measuring_requests(kind);
|
||||
|
||||
let mut builder = self
|
||||
let get_object = self
|
||||
.client
|
||||
.get_object()
|
||||
.bucket(request.bucket)
|
||||
.key(request.key)
|
||||
.set_range(request.range);
|
||||
|
||||
if let Some(etag) = request.etag {
|
||||
builder = builder.if_none_match(etag);
|
||||
}
|
||||
|
||||
let get_object = builder.send();
|
||||
.set_range(request.range)
|
||||
.send();
|
||||
|
||||
let get_object = tokio::select! {
|
||||
res = get_object => res,
|
||||
@@ -284,20 +277,6 @@ impl S3Bucket {
|
||||
);
|
||||
return Err(DownloadError::NotFound);
|
||||
}
|
||||
Err(SdkError::ServiceError(e))
|
||||
// aws_smithy_runtime_api::http::response::StatusCode isn't
|
||||
// re-exported by any aws crates, so just check the numeric
|
||||
// status against http_types::StatusCode instead of pulling it.
|
||||
if e.raw().status().as_u16() == StatusCode::NotModified =>
|
||||
{
|
||||
// Count an unmodified file as a success.
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
AttemptOutcome::Ok,
|
||||
started_at,
|
||||
);
|
||||
return Err(DownloadError::Unmodified);
|
||||
}
|
||||
Err(e) => {
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
@@ -794,7 +773,6 @@ impl RemoteStorage for S3Bucket {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
// if prefix is not none then download file `prefix/from`
|
||||
@@ -803,8 +781,33 @@ impl RemoteStorage for S3Bucket {
|
||||
GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
etag: opts.etag.as_ref().map(|e| e.to_string()),
|
||||
range: opts.byte_range_header(),
|
||||
range: None,
|
||||
},
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
// S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
|
||||
// and needs both ends to be exclusive
|
||||
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
|
||||
let range = Some(match end_inclusive {
|
||||
Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
|
||||
None => format!("bytes={start_inclusive}-"),
|
||||
});
|
||||
|
||||
self.download_object(
|
||||
GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
range,
|
||||
},
|
||||
cancel,
|
||||
)
|
||||
|
||||
@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::{
|
||||
Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,
|
||||
RemoteStorage, StorageMetadata, TimeTravelError,
|
||||
Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
|
||||
StorageMetadata, TimeTravelError,
|
||||
};
|
||||
|
||||
pub struct UnreliableWrapper {
|
||||
@@ -167,14 +167,28 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
async fn download(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
opts: &DownloadOpts,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
// Note: We treat any byte range as an "attempt" of the same operation.
|
||||
// We don't pay attention to the ranges. That's good enough for now.
|
||||
self.attempt(RemoteOp::Download(from.clone()))
|
||||
.map_err(DownloadError::Other)?;
|
||||
self.inner.download(from, opts, cancel).await
|
||||
self.inner.download(from, cancel).await
|
||||
}
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Download, DownloadError> {
|
||||
// Note: We treat any download_byte_range as an "attempt" of the same
|
||||
// operation. We don't pay attention to the ranges. That's good enough
|
||||
// for now.
|
||||
self.attempt(RemoteOp::Download(from.clone()))
|
||||
.map_err(DownloadError::Other)?;
|
||||
self.inner
|
||||
.download_byte_range(from, start_inclusive, end_exclusive, cancel)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
use futures::StreamExt;
|
||||
use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
|
||||
use std::ops::Bound;
|
||||
use remote_storage::ListingMode;
|
||||
use remote_storage::RemotePath;
|
||||
use std::sync::Arc;
|
||||
use std::{collections::HashSet, num::NonZeroU32};
|
||||
use test_context::test_context;
|
||||
@@ -284,25 +284,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
ctx.client.upload(data, len, &path, None, &cancel).await?;
|
||||
|
||||
// Normal download request
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.await?;
|
||||
let dl = ctx.client.download(&path, &cancel).await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig);
|
||||
|
||||
// Full range (end specified)
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(
|
||||
&path,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(0),
|
||||
byte_end: Bound::Excluded(len as u64),
|
||||
..Default::default()
|
||||
},
|
||||
&cancel,
|
||||
)
|
||||
.download_byte_range(&path, 0, Some(len as u64), &cancel)
|
||||
.await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig);
|
||||
@@ -310,15 +299,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
// partial range (end specified)
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(
|
||||
&path,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(4),
|
||||
byte_end: Bound::Excluded(10),
|
||||
..Default::default()
|
||||
},
|
||||
&cancel,
|
||||
)
|
||||
.download_byte_range(&path, 4, Some(10), &cancel)
|
||||
.await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig[4..10]);
|
||||
@@ -326,15 +307,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
// partial range (end beyond real end)
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(
|
||||
&path,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(8),
|
||||
byte_end: Bound::Excluded(len as u64 * 100),
|
||||
..Default::default()
|
||||
},
|
||||
&cancel,
|
||||
)
|
||||
.download_byte_range(&path, 8, Some(len as u64 * 100), &cancel)
|
||||
.await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig[8..]);
|
||||
@@ -342,14 +315,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
// Partial range (end unspecified)
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(
|
||||
&path,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(4),
|
||||
..Default::default()
|
||||
},
|
||||
&cancel,
|
||||
)
|
||||
.download_byte_range(&path, 4, None, &cancel)
|
||||
.await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig[4..]);
|
||||
@@ -357,14 +323,7 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
// Full range (end unspecified)
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(
|
||||
&path,
|
||||
&DownloadOpts {
|
||||
byte_start: Bound::Included(0),
|
||||
..Default::default()
|
||||
},
|
||||
&cancel,
|
||||
)
|
||||
.download_byte_range(&path, 0, None, &cancel)
|
||||
.await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig);
|
||||
@@ -378,54 +337,6 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Tests that conditional downloads work properly, by returning
|
||||
/// DownloadError::Unmodified when the object ETag matches the given ETag.
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
|
||||
return Ok(());
|
||||
};
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
// Create a file.
|
||||
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
|
||||
let data = bytes::Bytes::from_static("foo".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
|
||||
// Download it to obtain its etag.
|
||||
let mut opts = DownloadOpts::default();
|
||||
let download = ctx.client.download(&path, &opts, &cancel).await?;
|
||||
|
||||
// Download with the etag yields DownloadError::Unmodified.
|
||||
opts.etag = Some(download.etag);
|
||||
let result = ctx.client.download(&path, &opts, &cancel).await;
|
||||
assert!(
|
||||
matches!(result, Err(DownloadError::Unmodified)),
|
||||
"expected DownloadError::Unmodified, got {result:?}"
|
||||
);
|
||||
|
||||
// Replace the file contents.
|
||||
let data = bytes::Bytes::from_static("bar".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
|
||||
// A download with the old etag should yield the new file.
|
||||
let download = ctx.client.download(&path, &opts, &cancel).await?;
|
||||
assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
|
||||
|
||||
// A download with the new etag should yield Unmodified again.
|
||||
opts.etag = Some(download.etag);
|
||||
let result = ctx.client.download(&path, &opts, &cancel).await;
|
||||
assert!(
|
||||
matches!(result, Err(DownloadError::Unmodified)),
|
||||
"expected DownloadError::Unmodified, got {result:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
@@ -453,10 +364,7 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
// Normal download request
|
||||
ctx.client.copy_object(&path, &path_dest, &cancel).await?;
|
||||
|
||||
let dl = ctx
|
||||
.client
|
||||
.download(&path_dest, &DownloadOpts::default(), &cancel)
|
||||
.await?;
|
||||
let dl = ctx.client.download(&path_dest, &cancel).await?;
|
||||
let buf = download_to_vec(dl).await?;
|
||||
assert_eq!(&buf, &orig);
|
||||
|
||||
@@ -468,56 +376,3 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Tests that head_object works properly.
|
||||
#[test_context(MaybeEnabledStorage)]
|
||||
#[tokio::test]
|
||||
async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
|
||||
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
|
||||
return Ok(());
|
||||
};
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
|
||||
|
||||
// Errors on missing file.
|
||||
let result = ctx.client.head_object(&path, &cancel).await;
|
||||
assert!(
|
||||
matches!(result, Err(DownloadError::NotFound)),
|
||||
"expected NotFound, got {result:?}"
|
||||
);
|
||||
|
||||
// Create the file.
|
||||
let data = bytes::Bytes::from_static("foo".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
|
||||
// Fetch the head metadata.
|
||||
let object = ctx.client.head_object(&path, &cancel).await?;
|
||||
assert_eq!(
|
||||
object,
|
||||
ListingObject {
|
||||
key: path.clone(),
|
||||
last_modified: object.last_modified, // ignore
|
||||
size: 3
|
||||
}
|
||||
);
|
||||
|
||||
// Wait for a couple of seconds, and then update the file to check the last
|
||||
// modified timestamp.
|
||||
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
|
||||
let data = bytes::Bytes::from_static("bar".as_bytes());
|
||||
let (stream, len) = wrap_stream(data);
|
||||
ctx.client.upload(stream, len, &path, None, &cancel).await?;
|
||||
let new = ctx.client.head_object(&path, &cancel).await?;
|
||||
|
||||
assert!(
|
||||
!new.last_modified
|
||||
.duration_since(object.last_modified)?
|
||||
.is_zero(),
|
||||
"last_modified did not advance"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@ use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
use futures_util::StreamExt;
|
||||
use remote_storage::{
|
||||
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
|
||||
RemoteStorageConfig, RemoteStorageKind, S3Config,
|
||||
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig,
|
||||
RemoteStorageKind, S3Config,
|
||||
};
|
||||
use test_context::test_context;
|
||||
use test_context::AsyncTestContext;
|
||||
@@ -121,8 +121,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
||||
|
||||
// A little check to ensure that our clock is not too far off from the S3 clock
|
||||
{
|
||||
let opts = DownloadOpts::default();
|
||||
let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
|
||||
let dl = retry(|| ctx.client.download(&path2, &cancel)).await?;
|
||||
let last_modified = dl.last_modified;
|
||||
let half_wt = WAIT_TIME.mul_f32(0.5);
|
||||
let t0_hwt = t0 + half_wt;
|
||||
@@ -160,12 +159,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
||||
let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
|
||||
println!("after recovery to t2: {t2_files_recovered:?}");
|
||||
assert_eq!(t2_files, t2_files_recovered);
|
||||
let path2_recovered_t2 = download_to_vec(
|
||||
ctx.client
|
||||
.download(&path2, &DownloadOpts::default(), &cancel)
|
||||
.await?,
|
||||
)
|
||||
.await?;
|
||||
let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
|
||||
assert_eq!(path2_recovered_t2, new_data.as_bytes());
|
||||
|
||||
// after recovery to t1: path1 is back, path2 has the old content
|
||||
@@ -176,12 +170,7 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
|
||||
let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
|
||||
println!("after recovery to t1: {t1_files_recovered:?}");
|
||||
assert_eq!(t1_files, t1_files_recovered);
|
||||
let path2_recovered_t1 = download_to_vec(
|
||||
ctx.client
|
||||
.download(&path2, &DownloadOpts::default(), &cancel)
|
||||
.await?,
|
||||
)
|
||||
.await?;
|
||||
let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?;
|
||||
assert_eq!(path2_recovered_t1, old_data.as_bytes());
|
||||
|
||||
// after recovery to t0: everything is gone except for path1
|
||||
@@ -427,7 +416,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
|
||||
let started_at = std::time::Instant::now();
|
||||
let mut stream = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.download(&path, &cancel)
|
||||
.await
|
||||
.expect("download succeeds")
|
||||
.download_stream;
|
||||
@@ -502,7 +491,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
|
||||
{
|
||||
let stream = ctx
|
||||
.client
|
||||
.download(&path, &DownloadOpts::default(), &cancel)
|
||||
.download(&path, &cancel)
|
||||
.await
|
||||
.expect("download succeeds")
|
||||
.download_stream;
|
||||
|
||||
@@ -5,7 +5,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
hyper0.workspace = true
|
||||
hyper.workspace = true
|
||||
opentelemetry = { workspace = true, features = ["trace"] }
|
||||
opentelemetry_sdk = { workspace = true, features = ["rt-tokio"] }
|
||||
opentelemetry-otlp = { workspace = true, default-features = false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Tracing wrapper for Hyper HTTP server
|
||||
|
||||
use hyper0::HeaderMap;
|
||||
use hyper0::{Body, Request, Response};
|
||||
use hyper::HeaderMap;
|
||||
use hyper::{Body, Request, Response};
|
||||
use std::future::Future;
|
||||
use tracing::Instrument;
|
||||
use tracing_opentelemetry::OpenTelemetrySpanExt;
|
||||
|
||||
@@ -22,7 +22,7 @@ chrono.workspace = true
|
||||
git-version.workspace = true
|
||||
hex = { workspace = true, features = ["serde"] }
|
||||
humantime.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
hyper = { workspace = true, features = ["full"] }
|
||||
fail.workspace = true
|
||||
futures = { workspace = true}
|
||||
jsonwebtoken.workspace = true
|
||||
|
||||
@@ -31,12 +31,9 @@ pub enum Scope {
|
||||
/// The scope used by pageservers in upcalls to storage controller and cloud control plane
|
||||
#[serde(rename = "generations_api")]
|
||||
GenerationsApi,
|
||||
/// Allows access to control plane managment API and all storage controller endpoints.
|
||||
/// Allows access to control plane managment API and some storage controller endpoints.
|
||||
Admin,
|
||||
|
||||
/// Allows access to control plane & storage controller endpoints used in infrastructure automation (e.g. node registration)
|
||||
Infra,
|
||||
|
||||
/// Allows access to storage controller APIs used by the scrubber, to interrogate the state
|
||||
/// of a tenant & post scrub results.
|
||||
Scrubber,
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
//! between other crates in this repository.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
pub mod backoff;
|
||||
|
||||
/// `Lsn` type implements common tasks on Log Sequence Numbers
|
||||
|
||||
@@ -7,13 +7,11 @@ use axum::{
|
||||
extract::{ws::WebSocket, State, WebSocketUpgrade},
|
||||
response::Response,
|
||||
};
|
||||
use axum::{routing::get, Router};
|
||||
use axum::{routing::get, Router, Server};
|
||||
use clap::Parser;
|
||||
use futures::Future;
|
||||
use std::net::SocketAddr;
|
||||
use std::{fmt::Debug, time::Duration};
|
||||
use sysinfo::{RefreshKind, System, SystemExt};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::{sync::broadcast, task::JoinHandle};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info};
|
||||
@@ -134,14 +132,14 @@ pub async fn start(args: &'static Args, token: CancellationToken) -> anyhow::Res
|
||||
args,
|
||||
});
|
||||
|
||||
let addr_str = args.addr();
|
||||
let addr: SocketAddr = addr_str.parse().expect("parsing address should not fail");
|
||||
|
||||
let listener = TcpListener::bind(&addr)
|
||||
.await
|
||||
let addr = args.addr();
|
||||
let bound = Server::try_bind(&addr.parse().expect("parsing address should not fail"))
|
||||
.with_context(|| format!("failed to bind to {addr}"))?;
|
||||
info!(addr_str, "server bound");
|
||||
axum::serve(listener, app.into_make_service())
|
||||
|
||||
info!(addr, "server bound");
|
||||
|
||||
bound
|
||||
.serve(app.into_make_service())
|
||||
.await
|
||||
.context("server exited")?;
|
||||
|
||||
|
||||
@@ -79,7 +79,8 @@ pub struct Config {
|
||||
/// memory.
|
||||
///
|
||||
/// The default value of `0.15` means that we *guarantee* sending upscale requests if the
|
||||
/// cgroup is using more than 85% of total memory.
|
||||
/// cgroup is using more than 85% of total memory (even if we're *not* separately reserving
|
||||
/// memory for the file cache).
|
||||
cgroup_min_overhead_fraction: f64,
|
||||
|
||||
cgroup_downscale_threshold_buffer_bytes: u64,
|
||||
@@ -96,12 +97,24 @@ impl Default for Config {
|
||||
}
|
||||
|
||||
impl Config {
|
||||
fn cgroup_threshold(&self, total_mem: u64) -> u64 {
|
||||
// We want our threshold to be met gracefully instead of letting postgres get OOM-killed
|
||||
// (or if there's room, spilling to swap).
|
||||
fn cgroup_threshold(&self, total_mem: u64, file_cache_disk_size: u64) -> u64 {
|
||||
// If the file cache is in tmpfs, then it will count towards shmem usage of the cgroup,
|
||||
// and thus be non-reclaimable, so we should allow for additional memory usage.
|
||||
//
|
||||
// If the file cache sits on disk, our desired stable system state is for it to be fully
|
||||
// page cached (its contents should only be paged to/from disk in situations where we can't
|
||||
// upscale fast enough). Page-cached memory is reclaimable, so we need to lower the
|
||||
// threshold for non-reclaimable memory so we scale up *before* the kernel starts paging
|
||||
// out the file cache.
|
||||
let memory_remaining_for_cgroup = total_mem.saturating_sub(file_cache_disk_size);
|
||||
|
||||
// Even if we're not separately making room for the file cache (if it's in tmpfs), we still
|
||||
// want our threshold to be met gracefully instead of letting postgres get OOM-killed.
|
||||
// So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory
|
||||
// remaining above the threshold.
|
||||
(total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64
|
||||
let max_threshold = (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64;
|
||||
|
||||
memory_remaining_for_cgroup.min(max_threshold)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,6 +149,11 @@ impl Runner {
|
||||
|
||||
let mem = get_total_system_memory();
|
||||
|
||||
let mut file_cache_disk_size = 0;
|
||||
|
||||
// We need to process file cache initialization before cgroup initialization, so that the memory
|
||||
// allocated to the file cache is appropriately taken into account when we decide the cgroup's
|
||||
// memory limits.
|
||||
if let Some(connstr) = &args.pgconnstr {
|
||||
info!("initializing file cache");
|
||||
let config = FileCacheConfig::default();
|
||||
@@ -166,6 +184,7 @@ impl Runner {
|
||||
info!("file cache size actually got set to {actual_size}")
|
||||
}
|
||||
|
||||
file_cache_disk_size = actual_size;
|
||||
state.filecache = Some(file_cache);
|
||||
}
|
||||
|
||||
@@ -188,7 +207,7 @@ impl Runner {
|
||||
cgroup.watch(hist_tx).await
|
||||
});
|
||||
|
||||
let threshold = state.config.cgroup_threshold(mem);
|
||||
let threshold = state.config.cgroup_threshold(mem, file_cache_disk_size);
|
||||
info!(threshold, "set initial cgroup threshold",);
|
||||
|
||||
state.cgroup = Some(CgroupState {
|
||||
@@ -240,7 +259,9 @@ impl Runner {
|
||||
return Ok((false, status.to_owned()));
|
||||
}
|
||||
|
||||
let new_threshold = self.config.cgroup_threshold(usable_system_memory);
|
||||
let new_threshold = self
|
||||
.config
|
||||
.cgroup_threshold(usable_system_memory, expected_file_cache_size);
|
||||
|
||||
let current = last_history.avg_non_reclaimable;
|
||||
|
||||
@@ -261,11 +282,13 @@ impl Runner {
|
||||
|
||||
// The downscaling has been approved. Downscale the file cache, then the cgroup.
|
||||
let mut status = vec![];
|
||||
let mut file_cache_disk_size = 0;
|
||||
if let Some(file_cache) = &mut self.filecache {
|
||||
let actual_usage = file_cache
|
||||
.set_file_cache_size(expected_file_cache_size)
|
||||
.await
|
||||
.context("failed to set file cache size")?;
|
||||
file_cache_disk_size = actual_usage;
|
||||
let message = format!(
|
||||
"set file cache size to {} MiB",
|
||||
bytes_to_mebibytes(actual_usage),
|
||||
@@ -275,7 +298,9 @@ impl Runner {
|
||||
}
|
||||
|
||||
if let Some(cgroup) = &mut self.cgroup {
|
||||
let new_threshold = self.config.cgroup_threshold(usable_system_memory);
|
||||
let new_threshold = self
|
||||
.config
|
||||
.cgroup_threshold(usable_system_memory, file_cache_disk_size);
|
||||
|
||||
let message = format!(
|
||||
"set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB",
|
||||
@@ -304,6 +329,7 @@ impl Runner {
|
||||
let new_mem = resources.mem;
|
||||
let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes);
|
||||
|
||||
let mut file_cache_disk_size = 0;
|
||||
if let Some(file_cache) = &mut self.filecache {
|
||||
let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory);
|
||||
info!(
|
||||
@@ -316,6 +342,7 @@ impl Runner {
|
||||
.set_file_cache_size(expected_usage)
|
||||
.await
|
||||
.context("failed to set file cache size")?;
|
||||
file_cache_disk_size = actual_usage;
|
||||
|
||||
if actual_usage != expected_usage {
|
||||
warn!(
|
||||
@@ -327,7 +354,9 @@ impl Runner {
|
||||
}
|
||||
|
||||
if let Some(cgroup) = &mut self.cgroup {
|
||||
let new_threshold = self.config.cgroup_threshold(usable_system_memory);
|
||||
let new_threshold = self
|
||||
.config
|
||||
.cgroup_threshold(usable_system_memory, file_cache_disk_size);
|
||||
|
||||
info!(
|
||||
"set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB",
|
||||
|
||||
@@ -30,7 +30,7 @@ futures.workspace = true
|
||||
hex.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper0.workspace = true
|
||||
hyper.workspace = true
|
||||
itertools.workspace = true
|
||||
md5.workspace = true
|
||||
nix.workspace = true
|
||||
|
||||
@@ -164,7 +164,11 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(
|
||||
pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
|
||||
));
|
||||
virtual_file::init(16384, virtual_file::io_engine_for_bench());
|
||||
virtual_file::init(
|
||||
16384,
|
||||
virtual_file::io_engine_for_bench(),
|
||||
pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
{
|
||||
|
||||
@@ -540,13 +540,10 @@ impl Client {
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
/// Configs io mode at runtime.
|
||||
pub async fn put_io_mode(
|
||||
&self,
|
||||
mode: &pageserver_api::models::virtual_file::IoMode,
|
||||
) -> Result<()> {
|
||||
let uri = format!("{}/v1/io_mode", self.mgmt_api_endpoint);
|
||||
self.request(Method::PUT, uri, mode)
|
||||
/// Configs io buffer alignment at runtime.
|
||||
pub async fn put_io_alignment(&self, align: usize) -> Result<()> {
|
||||
let uri = format!("{}/v1/io_alignment", self.mgmt_api_endpoint);
|
||||
self.request(Method::PUT, uri, align)
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
|
||||
@@ -152,7 +152,11 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
|
||||
// Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
|
||||
pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
pageserver::virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
let mut total_delta_layers = 0usize;
|
||||
|
||||
@@ -59,7 +59,7 @@ pub(crate) enum LayerCmd {
|
||||
|
||||
async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs, 1);
|
||||
page_cache::init(100);
|
||||
let file = VirtualFile::open(path, ctx).await?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
@@ -190,7 +190,11 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
new_tenant_id,
|
||||
new_timeline_id,
|
||||
} => {
|
||||
pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
pageserver::virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
|
||||
@@ -26,7 +26,7 @@ use pageserver::{
|
||||
tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
|
||||
virtual_file,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::{config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT, shard::TenantShardId};
|
||||
use postgres_ffi::ControlFileData;
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -205,7 +205,11 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {
|
||||
|
||||
async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||
);
|
||||
page_cache::init(100);
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
dump_layerfile_from_path(path, true, &ctx).await
|
||||
|
||||
@@ -59,9 +59,9 @@ pub(crate) struct Args {
|
||||
#[clap(long)]
|
||||
set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,
|
||||
|
||||
/// Before starting the benchmark, live-reconfigure the pageserver to use specified io mode (buffered vs. direct).
|
||||
/// Before starting the benchmark, live-reconfigure the pageserver to use specified alignment for io buffers.
|
||||
#[clap(long)]
|
||||
set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,
|
||||
set_io_alignment: Option<usize>,
|
||||
|
||||
targets: Option<Vec<TenantTimelineId>>,
|
||||
}
|
||||
@@ -129,8 +129,8 @@ async fn main_impl(
|
||||
mgmt_api_client.put_io_engine(engine_str).await?;
|
||||
}
|
||||
|
||||
if let Some(mode) = &args.set_io_mode {
|
||||
mgmt_api_client.put_io_mode(mode).await?;
|
||||
if let Some(align) = args.set_io_alignment {
|
||||
mgmt_api_client.put_io_alignment(align).await?;
|
||||
}
|
||||
|
||||
// discover targets
|
||||
|
||||
@@ -14,19 +14,14 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
|
||||
}
|
||||
(Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
|
||||
(Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
|
||||
(
|
||||
Scope::Admin
|
||||
| Scope::SafekeeperData
|
||||
| Scope::GenerationsApi
|
||||
| Scope::Infra
|
||||
| Scope::Scrubber,
|
||||
_,
|
||||
) => Err(AuthError(
|
||||
format!(
|
||||
"JWT scope '{:?}' is ineligible for Pageserver auth",
|
||||
claims.scope
|
||||
)
|
||||
.into(),
|
||||
)),
|
||||
(Scope::Admin | Scope::SafekeeperData | Scope::GenerationsApi | Scope::Scrubber, _) => {
|
||||
Err(AuthError(
|
||||
format!(
|
||||
"JWT scope '{:?}' is ineligible for Pageserver auth",
|
||||
claims.scope
|
||||
)
|
||||
.into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,7 +125,8 @@ fn main() -> anyhow::Result<()> {
|
||||
|
||||
// after setting up logging, log the effective IO engine choice and read path implementations
|
||||
info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
|
||||
info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
|
||||
info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
|
||||
info!(?conf.io_buffer_alignment, "starting with setting for IO buffer alignment");
|
||||
|
||||
// The tenants directory contains all the pageserver local disk state.
|
||||
// Create if not exists and make sure all the contents are durable before proceeding.
|
||||
@@ -167,7 +168,11 @@ fn main() -> anyhow::Result<()> {
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
|
||||
virtual_file::init(
|
||||
conf.max_file_descriptors,
|
||||
conf.virtual_file_io_engine,
|
||||
conf.io_buffer_alignment,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
|
||||
@@ -570,7 +575,7 @@ fn start_pageserver(
|
||||
.build()
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let service = utils::http::RouterService::new(router).unwrap();
|
||||
let server = hyper0::Server::from_tcp(http_listener)?
|
||||
let server = hyper::Server::from_tcp(http_listener)?
|
||||
.serve(service)
|
||||
.with_graceful_shutdown({
|
||||
let cancel = cancel.clone();
|
||||
|
||||
@@ -174,7 +174,9 @@ pub struct PageServerConf {
|
||||
pub l0_flush: crate::l0_flush::L0FlushConfig,
|
||||
|
||||
/// Direct IO settings
|
||||
pub virtual_file_io_mode: virtual_file::IoMode,
|
||||
pub virtual_file_direct_io: virtual_file::DirectIoMode,
|
||||
|
||||
pub io_buffer_alignment: usize,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -323,10 +325,11 @@ impl PageServerConf {
|
||||
image_compression,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
l0_flush,
|
||||
virtual_file_io_mode,
|
||||
virtual_file_direct_io,
|
||||
concurrent_tenant_warmup,
|
||||
concurrent_tenant_size_logical_size_queries,
|
||||
virtual_file_io_engine,
|
||||
io_buffer_alignment,
|
||||
tenant_config,
|
||||
} = config_toml;
|
||||
|
||||
@@ -365,6 +368,8 @@ impl PageServerConf {
|
||||
max_vectored_read_bytes,
|
||||
image_compression,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
virtual_file_direct_io,
|
||||
io_buffer_alignment,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
@@ -403,7 +408,6 @@ impl PageServerConf {
|
||||
l0_flush: l0_flush
|
||||
.map(crate::l0_flush::L0FlushConfig::from)
|
||||
.unwrap_or_default(),
|
||||
virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------
|
||||
|
||||
@@ -17,7 +17,6 @@ use hyper::header;
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use pageserver_api::models::virtual_file::IoMode;
|
||||
use pageserver_api::models::AuxFilePolicy;
|
||||
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
|
||||
use pageserver_api::models::IngestAuxFilesRequest;
|
||||
@@ -704,8 +703,6 @@ async fn timeline_archival_config_handler(
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let state = get_state(&request);
|
||||
@@ -716,7 +713,7 @@ async fn timeline_archival_config_handler(
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
|
||||
tenant
|
||||
.apply_timeline_archival_config(timeline_id, request_data.state, ctx)
|
||||
.apply_timeline_archival_config(timeline_id, request_data.state)
|
||||
.await?;
|
||||
Ok::<_, ApiError>(())
|
||||
}
|
||||
@@ -2382,13 +2379,17 @@ async fn put_io_engine_handler(
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn put_io_mode_handler(
|
||||
async fn put_io_alignment_handler(
|
||||
mut r: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&r, None)?;
|
||||
let mode: IoMode = json_request(&mut r).await?;
|
||||
crate::virtual_file::set_io_mode(mode);
|
||||
let align: usize = json_request(&mut r).await?;
|
||||
crate::virtual_file::set_io_buffer_alignment(align).map_err(|align| {
|
||||
ApiError::PreconditionFailed(
|
||||
format!("Requested io alignment ({align}) is not a power of two").into(),
|
||||
)
|
||||
})?;
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
@@ -3079,7 +3080,9 @@ pub fn make_router(
|
||||
|r| api_handler(r, timeline_collect_keyspace),
|
||||
)
|
||||
.put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
|
||||
.put("/v1/io_mode", |r| api_handler(r, put_io_mode_handler))
|
||||
.put("/v1/io_alignment", |r| {
|
||||
api_handler(r, put_io_alignment_handler)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/force_aux_policy_switch",
|
||||
|r| api_handler(r, force_aux_policy_switch_handler),
|
||||
|
||||
@@ -13,8 +13,6 @@ pub mod http;
|
||||
pub mod import_datadir;
|
||||
pub mod l0_flush;
|
||||
|
||||
extern crate hyper0 as hyper;
|
||||
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
pub use pageserver_api::keyspace;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
@@ -38,7 +38,6 @@ use std::future::Future;
|
||||
use std::sync::Weak;
|
||||
use std::time::SystemTime;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use timeline::offload::offload_timeline;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::sync::watch;
|
||||
use tokio::task::JoinSet;
|
||||
@@ -288,13 +287,9 @@ pub struct Tenant {
|
||||
|
||||
/// During timeline creation, we first insert the TimelineId to the
|
||||
/// creating map, then `timelines`, then remove it from the creating map.
|
||||
/// **Lock order**: if acquiring both, acquire`timelines` before `timelines_creating`
|
||||
/// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating`
|
||||
timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,
|
||||
|
||||
/// Possibly offloaded and archived timelines
|
||||
/// **Lock order**: if acquiring both, acquire`timelines` before `timelines_offloaded`
|
||||
timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
|
||||
|
||||
// This mutex prevents creation of new timelines during GC.
|
||||
// Adding yet another mutex (in addition to `timelines`) is needed because holding
|
||||
// `timelines` mutex during all GC iteration
|
||||
@@ -489,65 +484,6 @@ impl WalRedoManager {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OffloadedTimeline {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
|
||||
// TODO: once we persist offloaded state, make this lazily constructed
|
||||
pub remote_client: Arc<RemoteTimelineClient>,
|
||||
|
||||
/// Prevent two tasks from deleting the timeline at the same time. If held, the
|
||||
/// timeline is being deleted. If 'true', the timeline has already been deleted.
|
||||
pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
|
||||
}
|
||||
|
||||
impl OffloadedTimeline {
|
||||
fn from_timeline(timeline: &Timeline) -> Self {
|
||||
Self {
|
||||
tenant_shard_id: timeline.tenant_shard_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
|
||||
|
||||
remote_client: timeline.remote_client.clone(),
|
||||
delete_progress: timeline.delete_progress.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TimelineOrOffloaded {
|
||||
Timeline(Arc<Timeline>),
|
||||
Offloaded(Arc<OffloadedTimeline>),
|
||||
}
|
||||
|
||||
impl TimelineOrOffloaded {
|
||||
pub fn tenant_shard_id(&self) -> TenantShardId {
|
||||
match self {
|
||||
TimelineOrOffloaded::Timeline(timeline) => timeline.tenant_shard_id,
|
||||
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.tenant_shard_id,
|
||||
}
|
||||
}
|
||||
pub fn timeline_id(&self) -> TimelineId {
|
||||
match self {
|
||||
TimelineOrOffloaded::Timeline(timeline) => timeline.timeline_id,
|
||||
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.timeline_id,
|
||||
}
|
||||
}
|
||||
pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {
|
||||
match self {
|
||||
TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
|
||||
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
|
||||
}
|
||||
}
|
||||
pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
|
||||
match self {
|
||||
TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
|
||||
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
|
||||
pub enum GetTimelineError {
|
||||
#[error("Timeline is shutting down")]
|
||||
@@ -1470,192 +1406,52 @@ impl Tenant {
|
||||
}
|
||||
}
|
||||
|
||||
fn check_to_be_archived_has_no_unarchived_children(
|
||||
timeline_id: TimelineId,
|
||||
timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
|
||||
) -> Result<(), TimelineArchivalError> {
|
||||
let children: Vec<TimelineId> = timelines
|
||||
.iter()
|
||||
.filter_map(|(id, entry)| {
|
||||
if entry.get_ancestor_timeline_id() != Some(timeline_id) {
|
||||
return None;
|
||||
}
|
||||
if entry.is_archived() == Some(true) {
|
||||
return None;
|
||||
}
|
||||
Some(*id)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if !children.is_empty() {
|
||||
return Err(TimelineArchivalError::HasUnarchivedChildren(children));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_ancestor_of_to_be_unarchived_is_not_archived(
|
||||
ancestor_timeline_id: TimelineId,
|
||||
timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
|
||||
offloaded_timelines: &std::sync::MutexGuard<
|
||||
'_,
|
||||
HashMap<TimelineId, Arc<OffloadedTimeline>>,
|
||||
>,
|
||||
) -> Result<(), TimelineArchivalError> {
|
||||
let has_archived_parent =
|
||||
if let Some(ancestor_timeline) = timelines.get(&ancestor_timeline_id) {
|
||||
ancestor_timeline.is_archived() == Some(true)
|
||||
} else if offloaded_timelines.contains_key(&ancestor_timeline_id) {
|
||||
true
|
||||
} else {
|
||||
error!("ancestor timeline {ancestor_timeline_id} not found");
|
||||
if cfg!(debug_assertions) {
|
||||
panic!("ancestor timeline {ancestor_timeline_id} not found");
|
||||
}
|
||||
return Err(TimelineArchivalError::NotFound);
|
||||
};
|
||||
if has_archived_parent {
|
||||
return Err(TimelineArchivalError::HasArchivedParent(
|
||||
ancestor_timeline_id,
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_to_be_unarchived_timeline_has_no_archived_parent(
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> Result<(), TimelineArchivalError> {
|
||||
if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
|
||||
if ancestor_timeline.is_archived() == Some(true) {
|
||||
return Err(TimelineArchivalError::HasArchivedParent(
|
||||
ancestor_timeline.timeline_id,
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Loads the specified (offloaded) timeline from S3 and attaches it as a loaded timeline
|
||||
async fn unoffload_timeline(
|
||||
self: &Arc<Self>,
|
||||
timeline_id: TimelineId,
|
||||
ctx: RequestContext,
|
||||
) -> Result<Arc<Timeline>, TimelineArchivalError> {
|
||||
let cancel = self.cancel.clone();
|
||||
let timeline_preload = self
|
||||
.load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
|
||||
.await;
|
||||
|
||||
let index_part = match timeline_preload.index_part {
|
||||
Ok(index_part) => {
|
||||
debug!("remote index part exists for timeline {timeline_id}");
|
||||
index_part
|
||||
}
|
||||
Err(DownloadError::NotFound) => {
|
||||
error!(%timeline_id, "index_part not found on remote");
|
||||
return Err(TimelineArchivalError::NotFound);
|
||||
}
|
||||
Err(e) => {
|
||||
// Some (possibly ephemeral) error happened during index_part download.
|
||||
warn!(%timeline_id, "Failed to load index_part from remote storage, failed creation? ({e})");
|
||||
return Err(TimelineArchivalError::Other(
|
||||
anyhow::Error::new(e).context("downloading index_part from remote storage"),
|
||||
));
|
||||
}
|
||||
};
|
||||
let index_part = match index_part {
|
||||
MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
|
||||
MaybeDeletedIndexPart::Deleted(_index_part) => {
|
||||
info!("timeline is deleted according to index_part.json");
|
||||
return Err(TimelineArchivalError::NotFound);
|
||||
}
|
||||
};
|
||||
let remote_metadata = index_part.metadata.clone();
|
||||
let timeline_resources = self.build_timeline_resources(timeline_id);
|
||||
self.load_remote_timeline(
|
||||
timeline_id,
|
||||
index_part,
|
||||
remote_metadata,
|
||||
timeline_resources,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to load remote timeline {} for tenant {}",
|
||||
timeline_id, self.tenant_shard_id
|
||||
)
|
||||
})?;
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
if let Some(timeline) = timelines.get(&timeline_id) {
|
||||
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
|
||||
if offloaded_timelines.remove(&timeline_id).is_none() {
|
||||
warn!("timeline already removed from offloaded timelines");
|
||||
}
|
||||
Ok(Arc::clone(timeline))
|
||||
} else {
|
||||
warn!("timeline not available directly after attach");
|
||||
Err(TimelineArchivalError::Other(anyhow::anyhow!(
|
||||
"timeline not available directly after attach"
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn apply_timeline_archival_config(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
new_state: TimelineArchivalState,
|
||||
ctx: RequestContext,
|
||||
state: TimelineArchivalState,
|
||||
) -> Result<(), TimelineArchivalError> {
|
||||
info!("setting timeline archival config");
|
||||
// First part: figure out what is needed to do, and do validation
|
||||
let timeline_or_unarchive_offloaded = 'outer: {
|
||||
let timeline = {
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
|
||||
let Some(timeline) = timelines.get(&timeline_id) else {
|
||||
let offloaded_timelines = self.timelines_offloaded.lock().unwrap();
|
||||
let Some(offloaded) = offloaded_timelines.get(&timeline_id) else {
|
||||
return Err(TimelineArchivalError::NotFound);
|
||||
};
|
||||
if new_state == TimelineArchivalState::Archived {
|
||||
// It's offloaded already, so nothing to do
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(ancestor_timeline_id) = offloaded.ancestor_timeline_id {
|
||||
Self::check_ancestor_of_to_be_unarchived_is_not_archived(
|
||||
ancestor_timeline_id,
|
||||
&timelines,
|
||||
&offloaded_timelines,
|
||||
)?;
|
||||
}
|
||||
break 'outer None;
|
||||
return Err(TimelineArchivalError::NotFound);
|
||||
};
|
||||
|
||||
// Do some validation. We release the timelines lock below, so there is potential
|
||||
// for race conditions: these checks are more present to prevent misunderstandings of
|
||||
// the API's capabilities, instead of serving as the sole way to defend their invariants.
|
||||
match new_state {
|
||||
TimelineArchivalState::Unarchived => {
|
||||
Self::check_to_be_unarchived_timeline_has_no_archived_parent(timeline)?
|
||||
}
|
||||
TimelineArchivalState::Archived => {
|
||||
Self::check_to_be_archived_has_no_unarchived_children(timeline_id, &timelines)?
|
||||
if state == TimelineArchivalState::Unarchived {
|
||||
if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
|
||||
if ancestor_timeline.is_archived() == Some(true) {
|
||||
return Err(TimelineArchivalError::HasArchivedParent(
|
||||
ancestor_timeline.timeline_id,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Arc::clone(timeline))
|
||||
|
||||
// Ensure that there are no non-archived child timelines
|
||||
let children: Vec<TimelineId> = timelines
|
||||
.iter()
|
||||
.filter_map(|(id, entry)| {
|
||||
if entry.get_ancestor_timeline_id() != Some(timeline_id) {
|
||||
return None;
|
||||
}
|
||||
if entry.is_archived() == Some(true) {
|
||||
return None;
|
||||
}
|
||||
Some(*id)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if !children.is_empty() && state == TimelineArchivalState::Archived {
|
||||
return Err(TimelineArchivalError::HasUnarchivedChildren(children));
|
||||
}
|
||||
Arc::clone(timeline)
|
||||
};
|
||||
|
||||
// Second part: unarchive timeline (if needed)
|
||||
let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
|
||||
timeline
|
||||
} else {
|
||||
// Turn offloaded timeline into a non-offloaded one
|
||||
self.unoffload_timeline(timeline_id, ctx).await?
|
||||
};
|
||||
|
||||
// Third part: upload new timeline archival state and block until it is present in S3
|
||||
let upload_needed = timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_timeline_archival_state(new_state)?;
|
||||
.schedule_index_upload_for_timeline_archival_state(state)?;
|
||||
|
||||
if upload_needed {
|
||||
info!("Uploading new state");
|
||||
@@ -2088,7 +1884,7 @@ impl Tenant {
|
||||
///
|
||||
/// Returns whether we have pending compaction task.
|
||||
async fn compaction_iteration(
|
||||
self: &Arc<Self>,
|
||||
&self,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, timeline::CompactionError> {
|
||||
@@ -2109,28 +1905,21 @@ impl Tenant {
|
||||
// while holding the lock. Then drop the lock and actually perform the
|
||||
// compactions. We don't want to block everything else while the
|
||||
// compaction runs.
|
||||
let timelines_to_compact_or_offload;
|
||||
{
|
||||
let timelines_to_compact = {
|
||||
let timelines = self.timelines.lock().unwrap();
|
||||
timelines_to_compact_or_offload = timelines
|
||||
let timelines_to_compact = timelines
|
||||
.iter()
|
||||
.filter_map(|(timeline_id, timeline)| {
|
||||
let (is_active, can_offload) = (timeline.is_active(), timeline.can_offload());
|
||||
let has_no_unoffloaded_children = {
|
||||
!timelines
|
||||
.iter()
|
||||
.any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(*timeline_id))
|
||||
};
|
||||
let can_offload = can_offload && has_no_unoffloaded_children;
|
||||
if (is_active, can_offload) == (false, false) {
|
||||
None
|
||||
if timeline.is_active() {
|
||||
Some((*timeline_id, timeline.clone()))
|
||||
} else {
|
||||
Some((*timeline_id, timeline.clone(), (is_active, can_offload)))
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
drop(timelines);
|
||||
}
|
||||
timelines_to_compact
|
||||
};
|
||||
|
||||
// Before doing any I/O work, check our circuit breaker
|
||||
if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
|
||||
@@ -2140,34 +1929,20 @@ impl Tenant {
|
||||
|
||||
let mut has_pending_task = false;
|
||||
|
||||
for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
|
||||
{
|
||||
let pending_task_left = if *can_compact {
|
||||
Some(
|
||||
timeline
|
||||
.compact(cancel, EnumSet::empty(), ctx)
|
||||
.instrument(info_span!("compact_timeline", %timeline_id))
|
||||
.await
|
||||
.inspect_err(|e| match e {
|
||||
timeline::CompactionError::ShuttingDown => (),
|
||||
timeline::CompactionError::Other(e) => {
|
||||
self.compaction_circuit_breaker
|
||||
.lock()
|
||||
.unwrap()
|
||||
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
|
||||
}
|
||||
})?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
has_pending_task |= pending_task_left.unwrap_or(false);
|
||||
if pending_task_left == Some(false) && *can_offload {
|
||||
offload_timeline(self, timeline)
|
||||
.instrument(info_span!("offload_timeline", %timeline_id))
|
||||
.await
|
||||
.map_err(timeline::CompactionError::Other)?;
|
||||
}
|
||||
for (timeline_id, timeline) in &timelines_to_compact {
|
||||
has_pending_task |= timeline
|
||||
.compact(cancel, EnumSet::empty(), ctx)
|
||||
.instrument(info_span!("compact_timeline", %timeline_id))
|
||||
.await
|
||||
.inspect_err(|e| match e {
|
||||
timeline::CompactionError::ShuttingDown => (),
|
||||
timeline::CompactionError::Other(e) => {
|
||||
self.compaction_circuit_breaker
|
||||
.lock()
|
||||
.unwrap()
|
||||
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
|
||||
}
|
||||
})?;
|
||||
}
|
||||
|
||||
self.compaction_circuit_breaker
|
||||
@@ -3077,7 +2852,6 @@ impl Tenant {
|
||||
constructed_at: Instant::now(),
|
||||
timelines: Mutex::new(HashMap::new()),
|
||||
timelines_creating: Mutex::new(HashSet::new()),
|
||||
timelines_offloaded: Mutex::new(HashMap::new()),
|
||||
gc_cs: tokio::sync::Mutex::new(()),
|
||||
walredo_mgr,
|
||||
remote_storage,
|
||||
|
||||
@@ -84,7 +84,7 @@ impl Drop for EphemeralFile {
|
||||
fn drop(&mut self) {
|
||||
// unlink the file
|
||||
// we are clear to do this, because we have entered a gate
|
||||
let path = self.buffered_writer.as_inner().as_inner().path();
|
||||
let path = &self.buffered_writer.as_inner().as_inner().path;
|
||||
let res = std::fs::remove_file(path);
|
||||
if let Err(e) = res {
|
||||
if e.kind() != std::io::ErrorKind::NotFound {
|
||||
@@ -356,7 +356,7 @@ mod tests {
|
||||
}
|
||||
|
||||
let file_contents =
|
||||
std::fs::read(file.buffered_writer.as_inner().as_inner().path()).unwrap();
|
||||
std::fs::read(&file.buffered_writer.as_inner().as_inner().path).unwrap();
|
||||
assert_eq!(file_contents, &content[0..cap]);
|
||||
|
||||
let buffer_contents = file.buffered_writer.inspect_buffer();
|
||||
@@ -392,7 +392,7 @@ mod tests {
|
||||
.buffered_writer
|
||||
.as_inner()
|
||||
.as_inner()
|
||||
.path()
|
||||
.path
|
||||
.metadata()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
|
||||
@@ -141,14 +141,14 @@ impl GcBlock {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) {
|
||||
pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
|
||||
let unblocked = {
|
||||
let mut g = self.reasons.lock().unwrap();
|
||||
if g.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
g.remove(timeline_id);
|
||||
g.remove(&timeline.timeline_id);
|
||||
|
||||
BlockingReasons::clean_and_summarize(g).is_none()
|
||||
};
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::tenant::Generation;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
|
||||
use crate::TEMP_FILE_SUFFIX;
|
||||
use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::pausable_failpoint;
|
||||
@@ -153,9 +153,7 @@ async fn download_object<'a>(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -206,9 +204,7 @@ async fn download_object<'a>(
|
||||
.with_context(|| format!("create a destination file for layer '{dst_path}'"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let mut download = storage
|
||||
.download(src_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let mut download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
@@ -348,9 +344,7 @@ async fn do_download_index_part(
|
||||
|
||||
let index_part_bytes = download_retry_forever(
|
||||
|| async {
|
||||
let download = storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await?;
|
||||
let download = storage.download(&remote_path, cancel).await?;
|
||||
|
||||
let mut bytes = Vec::new();
|
||||
|
||||
@@ -532,15 +526,10 @@ pub(crate) async fn download_initdb_tar_zst(
|
||||
.with_context(|| format!("tempfile creation {temp_path}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let download = match storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
.await
|
||||
{
|
||||
let download = match storage.download(&remote_path, cancel).await {
|
||||
Ok(dl) => dl,
|
||||
Err(DownloadError::NotFound) => {
|
||||
storage
|
||||
.download(&remote_preserved_path, &DownloadOpts::default(), cancel)
|
||||
.await?
|
||||
storage.download(&remote_preserved_path, cancel).await?
|
||||
}
|
||||
Err(other) => Err(other)?,
|
||||
};
|
||||
|
||||
@@ -49,7 +49,7 @@ use futures::Future;
|
||||
use metrics::UIntGauge;
|
||||
use pageserver_api::models::SecondaryProgress;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};
|
||||
use remote_storage::{DownloadError, Etag, GenericRemoteStorage};
|
||||
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info_span, instrument, warn, Instrument};
|
||||
@@ -944,35 +944,36 @@ impl<'a> TenantDownloader<'a> {
|
||||
) -> Result<HeatMapDownload, UpdateError> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
|
||||
// TODO: pull up etag check into the request, to do a conditional GET rather than
|
||||
// issuing a GET and then maybe ignoring the response body
|
||||
// (https://github.com/neondatabase/neon/issues/6199)
|
||||
tracing::debug!("Downloading heatmap for secondary tenant",);
|
||||
|
||||
let heatmap_path = remote_heatmap_path(tenant_shard_id);
|
||||
let cancel = &self.secondary_state.cancel;
|
||||
let opts = DownloadOpts {
|
||||
etag: prev_etag.cloned(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
backoff::retry(
|
||||
|| async {
|
||||
let download = match self
|
||||
let download = self
|
||||
.remote_storage
|
||||
.download(&heatmap_path, &opts, cancel)
|
||||
.download(&heatmap_path, cancel)
|
||||
.await
|
||||
{
|
||||
Ok(download) => download,
|
||||
Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
.map_err(UpdateError::from)?;
|
||||
|
||||
let mut heatmap_bytes = Vec::new();
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
|
||||
Ok(HeatMapDownload::Modified(HeatMapModified {
|
||||
etag: download.etag,
|
||||
last_modified: download.last_modified,
|
||||
bytes: heatmap_bytes,
|
||||
}))
|
||||
SECONDARY_MODE.download_heatmap.inc();
|
||||
|
||||
if Some(&download.etag) == prev_etag {
|
||||
Ok(HeatMapDownload::Unmodified)
|
||||
} else {
|
||||
let mut heatmap_bytes = Vec::new();
|
||||
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
|
||||
Ok(HeatMapDownload::Modified(HeatMapModified {
|
||||
etag: download.etag,
|
||||
last_modified: download.last_modified,
|
||||
bytes: heatmap_bytes,
|
||||
}))
|
||||
}
|
||||
},
|
||||
|e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
|
||||
FAILED_DOWNLOAD_WARN_THRESHOLD,
|
||||
@@ -983,7 +984,6 @@ impl<'a> TenantDownloader<'a> {
|
||||
.await
|
||||
.ok_or_else(|| UpdateError::Cancelled)
|
||||
.and_then(|x| x)
|
||||
.inspect(|_| SECONDARY_MODE.download_heatmap.inc())
|
||||
}
|
||||
|
||||
/// Download heatmap layers that are not present on local disk, or update their
|
||||
|
||||
@@ -53,7 +53,6 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::MaxVectoredReadBytes;
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
@@ -573,7 +572,7 @@ impl DeltaLayerWriterInner {
|
||||
ensure!(
|
||||
metadata.len() <= S3_UPLOAD_LIMIT,
|
||||
"Created delta layer file at {} of size {} above limit {S3_UPLOAD_LIMIT}!",
|
||||
file.path(),
|
||||
file.path,
|
||||
metadata.len()
|
||||
);
|
||||
|
||||
@@ -791,7 +790,7 @@ impl DeltaLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
let file = VirtualFile::open_v2(path, ctx)
|
||||
let file = VirtualFile::open(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?;
|
||||
|
||||
@@ -964,25 +963,14 @@ impl DeltaLayerInner {
|
||||
.blobs_at
|
||||
.as_slice()
|
||||
.iter()
|
||||
.filter_map(|(_, blob_meta)| {
|
||||
if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
|
||||
// The size of values for these keys is unbounded and can
|
||||
// grow very large in pathological cases.
|
||||
None
|
||||
} else {
|
||||
Some(format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
}
|
||||
})
|
||||
.map(|(_, blob_meta)| format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
.join(", ");
|
||||
|
||||
if !offenders.is_empty() {
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
largest_read_size,
|
||||
read_size_soft_max,
|
||||
offenders
|
||||
);
|
||||
}
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
largest_read_size,
|
||||
read_size_soft_max,
|
||||
offenders
|
||||
);
|
||||
}
|
||||
|
||||
largest_read_size
|
||||
@@ -1022,7 +1010,7 @@ impl DeltaLayerInner {
|
||||
blob_meta.key,
|
||||
PageReconstructError::Other(anyhow!(
|
||||
"Failed to read blobs from virtual file {}: {}",
|
||||
self.file.path(),
|
||||
self.file.path,
|
||||
kind
|
||||
)),
|
||||
);
|
||||
@@ -1048,7 +1036,7 @@ impl DeltaLayerInner {
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to decompress blob from virtual file {}",
|
||||
self.file.path(),
|
||||
self.file.path,
|
||||
))),
|
||||
);
|
||||
|
||||
@@ -1066,7 +1054,7 @@ impl DeltaLayerInner {
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to deserialize blob from virtual file {}",
|
||||
self.file.path(),
|
||||
self.file.path,
|
||||
))),
|
||||
);
|
||||
|
||||
@@ -1198,6 +1186,7 @@ impl DeltaLayerInner {
|
||||
let mut prev: Option<(Key, Lsn, BlobRef)> = None;
|
||||
|
||||
let mut read_builder: Option<ChunkedVectoredReadBuilder> = None;
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
|
||||
let max_read_size = self
|
||||
.max_vectored_read_bytes
|
||||
@@ -1246,6 +1235,7 @@ impl DeltaLayerInner {
|
||||
offsets.end.pos(),
|
||||
meta,
|
||||
max_read_size,
|
||||
align,
|
||||
))
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -49,7 +49,6 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hex;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::MaxVectoredReadBytes;
|
||||
use pageserver_api::key::DBDIR_KEY;
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
@@ -389,7 +388,7 @@ impl ImageLayerInner {
|
||||
max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
let file = VirtualFile::open_v2(path, ctx)
|
||||
let file = VirtualFile::open(path, ctx)
|
||||
.await
|
||||
.context("open layer file")?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
@@ -588,25 +587,14 @@ impl ImageLayerInner {
|
||||
.blobs_at
|
||||
.as_slice()
|
||||
.iter()
|
||||
.filter_map(|(_, blob_meta)| {
|
||||
if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
|
||||
// The size of values for these keys is unbounded and can
|
||||
// grow very large in pathological cases.
|
||||
None
|
||||
} else {
|
||||
Some(format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
}
|
||||
})
|
||||
.map(|(_, blob_meta)| format!("{}@{}", blob_meta.key, blob_meta.lsn))
|
||||
.join(", ");
|
||||
|
||||
if !offenders.is_empty() {
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
buf_size,
|
||||
max_vectored_read_bytes,
|
||||
offenders
|
||||
);
|
||||
}
|
||||
tracing::warn!(
|
||||
"Oversized vectored read ({} > {}) for keys {}",
|
||||
buf_size,
|
||||
max_vectored_read_bytes,
|
||||
offenders
|
||||
);
|
||||
}
|
||||
|
||||
let buf = BytesMut::with_capacity(buf_size);
|
||||
@@ -626,7 +614,7 @@ impl ImageLayerInner {
|
||||
meta.meta.key,
|
||||
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||
"Failed to decompress blob from virtual file {}",
|
||||
self.file.path(),
|
||||
self.file.path,
|
||||
))),
|
||||
);
|
||||
|
||||
@@ -647,7 +635,7 @@ impl ImageLayerInner {
|
||||
blob_meta.key,
|
||||
PageReconstructError::from(anyhow!(
|
||||
"Failed to read blobs from virtual file {}: {}",
|
||||
self.file.path(),
|
||||
self.file.path,
|
||||
kind
|
||||
)),
|
||||
);
|
||||
|
||||
@@ -7,7 +7,6 @@ pub(crate) mod handle;
|
||||
mod init;
|
||||
pub mod layer_manager;
|
||||
pub(crate) mod logical_size;
|
||||
pub mod offload;
|
||||
pub mod span;
|
||||
pub mod uninit;
|
||||
mod walreceiver;
|
||||
@@ -1557,17 +1556,6 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the internal state of the timeline is consistent with it being able to be offloaded.
|
||||
/// This is neccessary but not sufficient for offloading of the timeline as it might have
|
||||
/// child timelines that are not offloaded yet.
|
||||
pub(crate) fn can_offload(&self) -> bool {
|
||||
if self.remote_client.is_archived() != Some(true) {
|
||||
return false;
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
|
||||
/// compaction tasks.
|
||||
pub(crate) async fn compact(
|
||||
@@ -1830,6 +1818,7 @@ impl Timeline {
|
||||
self.current_state() == TimelineState::Active
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) fn is_archived(&self) -> Option<bool> {
|
||||
self.remote_client.is_archived()
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::{
|
||||
tenant::{
|
||||
metadata::TimelineMetadata,
|
||||
remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
|
||||
CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
|
||||
CreateTimelineCause, DeleteTimelineError, Tenant,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -24,14 +24,12 @@ use super::{Timeline, TimelineResources};
|
||||
/// Mark timeline as deleted in S3 so we won't pick it up next time
|
||||
/// during attach or pageserver restart.
|
||||
/// See comment in persist_index_part_with_deleted_flag.
|
||||
async fn set_deleted_in_remote_index(
|
||||
timeline: &TimelineOrOffloaded,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
let res = timeline
|
||||
.remote_client()
|
||||
async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
|
||||
match timeline
|
||||
.remote_client
|
||||
.persist_index_part_with_deleted_flag()
|
||||
.await;
|
||||
match res {
|
||||
.await
|
||||
{
|
||||
// If we (now, or already) marked it successfully as deleted, we can proceed
|
||||
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
|
||||
// Bail out otherwise
|
||||
@@ -129,9 +127,9 @@ pub(super) async fn delete_local_timeline_directory(
|
||||
}
|
||||
|
||||
/// Removes remote layers and an index file after them.
|
||||
async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> {
|
||||
async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
|
||||
timeline
|
||||
.remote_client()
|
||||
.remote_client
|
||||
.delete_all()
|
||||
.await
|
||||
.context("delete_all")
|
||||
@@ -139,41 +137,27 @@ async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyho
|
||||
|
||||
/// It is important that this gets called when DeletionGuard is being held.
|
||||
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
|
||||
async fn remove_maybe_offloaded_timeline_from_tenant(
|
||||
async fn remove_timeline_from_tenant(
|
||||
tenant: &Tenant,
|
||||
timeline: &TimelineOrOffloaded,
|
||||
timeline: &Timeline,
|
||||
_: &DeletionGuard, // using it as a witness
|
||||
) -> anyhow::Result<()> {
|
||||
// Remove the timeline from the map.
|
||||
// This observes the locking order between timelines and timelines_offloaded
|
||||
let mut timelines = tenant.timelines.lock().unwrap();
|
||||
let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
|
||||
let offloaded_children_exist = timelines_offloaded
|
||||
.iter()
|
||||
.any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
|
||||
let children_exist = timelines
|
||||
.iter()
|
||||
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id()));
|
||||
// XXX this can happen because of race conditions with branch creation.
|
||||
// We already deleted the remote layer files, so it's probably best to panic.
|
||||
if children_exist || offloaded_children_exist {
|
||||
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
|
||||
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
|
||||
// We already deleted the layer files, so it's probably best to panic.
|
||||
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
|
||||
if children_exist {
|
||||
panic!("Timeline grew children while we removed layer files");
|
||||
}
|
||||
|
||||
match timeline {
|
||||
TimelineOrOffloaded::Timeline(timeline) => {
|
||||
timelines.remove(&timeline.timeline_id).expect(
|
||||
"timeline that we were deleting was concurrently removed from 'timelines' map",
|
||||
);
|
||||
}
|
||||
TimelineOrOffloaded::Offloaded(timeline) => {
|
||||
timelines_offloaded
|
||||
.remove(&timeline.timeline_id)
|
||||
.expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
|
||||
}
|
||||
}
|
||||
timelines
|
||||
.remove(&timeline.timeline_id)
|
||||
.expect("timeline that we were deleting was concurrently removed from 'timelines' map");
|
||||
|
||||
drop(timelines_offloaded);
|
||||
drop(timelines);
|
||||
|
||||
Ok(())
|
||||
@@ -223,11 +207,9 @@ impl DeleteTimelineFlow {
|
||||
guard.mark_in_progress()?;
|
||||
|
||||
// Now that the Timeline is in Stopping state, request all the related tasks to shut down.
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
|
||||
timeline.shutdown(super::ShutdownMode::Hard).await;
|
||||
}
|
||||
timeline.shutdown(super::ShutdownMode::Hard).await;
|
||||
|
||||
tenant.gc_block.before_delete(&timeline.timeline_id());
|
||||
tenant.gc_block.before_delete(&timeline);
|
||||
|
||||
fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
|
||||
Err(anyhow::anyhow!(
|
||||
@@ -303,16 +285,15 @@ impl DeleteTimelineFlow {
|
||||
|
||||
guard.mark_in_progress()?;
|
||||
|
||||
let timeline = TimelineOrOffloaded::Timeline(timeline);
|
||||
Self::schedule_background(guard, tenant.conf, tenant, timeline);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn prepare(
|
||||
fn prepare(
|
||||
tenant: &Tenant,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
|
||||
) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
|
||||
// Note the interaction between this guard and deletion guard.
|
||||
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
|
||||
// This is important because when you take into account `remove_timeline_from_tenant`
|
||||
@@ -326,14 +307,8 @@ impl DeleteTimelineFlow {
|
||||
let timelines = tenant.timelines.lock().unwrap();
|
||||
|
||||
let timeline = match timelines.get(&timeline_id) {
|
||||
Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
|
||||
None => {
|
||||
let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
|
||||
match offloaded_timelines.get(&timeline_id) {
|
||||
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
|
||||
None => return Err(DeleteTimelineError::NotFound),
|
||||
}
|
||||
}
|
||||
Some(t) => t,
|
||||
None => return Err(DeleteTimelineError::NotFound),
|
||||
};
|
||||
|
||||
// Ensure that there are no child timelines **attached to that pageserver**,
|
||||
@@ -359,32 +334,30 @@ impl DeleteTimelineFlow {
|
||||
// to remove the timeline from it.
|
||||
// Always if you have two locks that are taken in different order this can result in a deadlock.
|
||||
|
||||
let delete_progress = Arc::clone(timeline.delete_progress());
|
||||
let delete_progress = Arc::clone(&timeline.delete_progress);
|
||||
let delete_lock_guard = match delete_progress.try_lock_owned() {
|
||||
Ok(guard) => DeletionGuard(guard),
|
||||
Err(_) => {
|
||||
// Unfortunately if lock fails arc is consumed.
|
||||
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
|
||||
timeline.delete_progress(),
|
||||
&timeline.delete_progress,
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
}
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
|
||||
Ok((timeline, delete_lock_guard))
|
||||
Ok((Arc::clone(timeline), delete_lock_guard))
|
||||
}
|
||||
|
||||
fn schedule_background(
|
||||
guard: DeletionGuard,
|
||||
conf: &'static PageServerConf,
|
||||
tenant: Arc<Tenant>,
|
||||
timeline: TimelineOrOffloaded,
|
||||
timeline: Arc<Timeline>,
|
||||
) {
|
||||
let tenant_shard_id = timeline.tenant_shard_id();
|
||||
let timeline_id = timeline.timeline_id();
|
||||
let tenant_shard_id = timeline.tenant_shard_id;
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
task_mgr::spawn(
|
||||
task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||
@@ -395,9 +368,7 @@ impl DeleteTimelineFlow {
|
||||
async move {
|
||||
if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
|
||||
error!("Error: {err:#}");
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
|
||||
timeline.set_broken(format!("{err:#}"))
|
||||
}
|
||||
timeline.set_broken(format!("{err:#}"))
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
@@ -409,19 +380,15 @@ impl DeleteTimelineFlow {
|
||||
mut guard: DeletionGuard,
|
||||
conf: &PageServerConf,
|
||||
tenant: &Tenant,
|
||||
timeline: &TimelineOrOffloaded,
|
||||
timeline: &Timeline,
|
||||
) -> Result<(), DeleteTimelineError> {
|
||||
// Offloaded timelines have no local state
|
||||
// TODO: once we persist offloaded information, delete the timeline from there, too
|
||||
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
|
||||
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
|
||||
}
|
||||
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
|
||||
|
||||
delete_remote_layers_and_index(timeline).await?;
|
||||
|
||||
pausable_failpoint!("in_progress_delete");
|
||||
|
||||
remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;
|
||||
remove_timeline_from_tenant(tenant, timeline, &guard).await?;
|
||||
|
||||
*guard = Self::Finished;
|
||||
|
||||
@@ -433,7 +400,7 @@ impl DeleteTimelineFlow {
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
|
||||
struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
|
||||
|
||||
impl Deref for DeletionGuard {
|
||||
type Target = DeleteTimelineFlow;
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
|
||||
|
||||
use super::{
|
||||
delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
|
||||
Timeline,
|
||||
};
|
||||
|
||||
pub(crate) async fn offload_timeline(
|
||||
tenant: &Tenant,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> anyhow::Result<()> {
|
||||
tracing::info!("offloading archived timeline");
|
||||
let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;
|
||||
|
||||
let TimelineOrOffloaded::Timeline(timeline) = timeline else {
|
||||
tracing::error!("timeline already offloaded, but given timeline object");
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// TODO extend guard mechanism above with method
|
||||
// to make deletions possible while offloading is in progress
|
||||
|
||||
// TODO mark timeline as offloaded in S3
|
||||
|
||||
let conf = &tenant.conf;
|
||||
delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await?;
|
||||
|
||||
remove_timeline_from_tenant(tenant, &timeline, &guard).await?;
|
||||
|
||||
{
|
||||
let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
|
||||
offloaded_timelines.insert(
|
||||
timeline.timeline_id,
|
||||
Arc::new(OffloadedTimeline::from_timeline(&timeline)),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// It is important that this gets called when DeletionGuard is being held.
|
||||
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
|
||||
async fn remove_timeline_from_tenant(
|
||||
tenant: &Tenant,
|
||||
timeline: &Timeline,
|
||||
_: &DeletionGuard, // using it as a witness
|
||||
) -> anyhow::Result<()> {
|
||||
// Remove the timeline from the map.
|
||||
let mut timelines = tenant.timelines.lock().unwrap();
|
||||
let children_exist = timelines
|
||||
.iter()
|
||||
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
|
||||
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
|
||||
// We already deleted the layer files, so it's probably best to panic.
|
||||
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
|
||||
if children_exist {
|
||||
panic!("Timeline grew children while we removed layer files");
|
||||
}
|
||||
|
||||
timelines
|
||||
.remove(&timeline.timeline_id)
|
||||
.expect("timeline that we were deleting was concurrently removed from 'timelines' map");
|
||||
|
||||
drop(timelines);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -194,6 +194,8 @@ pub(crate) struct ChunkedVectoredReadBuilder {
|
||||
/// Start offset and metadata for each blob in this read
|
||||
blobs_at: VecMap<u64, BlobMeta>,
|
||||
max_read_size: Option<usize>,
|
||||
/// Chunk size reads are coalesced into.
|
||||
chunk_size: usize,
|
||||
}
|
||||
|
||||
/// Computes x / d rounded up.
|
||||
@@ -202,7 +204,6 @@ fn div_round_up(x: usize, d: usize) -> usize {
|
||||
}
|
||||
|
||||
impl ChunkedVectoredReadBuilder {
|
||||
const CHUNK_SIZE: usize = virtual_file::get_io_buffer_alignment();
|
||||
/// Start building a new vectored read.
|
||||
///
|
||||
/// Note that by design, this does not check against reading more than `max_read_size` to
|
||||
@@ -213,19 +214,21 @@ impl ChunkedVectoredReadBuilder {
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
max_read_size: Option<usize>,
|
||||
chunk_size: usize,
|
||||
) -> Self {
|
||||
let mut blobs_at = VecMap::default();
|
||||
blobs_at
|
||||
.append(start_offset, meta)
|
||||
.expect("First insertion always succeeds");
|
||||
|
||||
let start_blk_no = start_offset as usize / Self::CHUNK_SIZE;
|
||||
let end_blk_no = div_round_up(end_offset as usize, Self::CHUNK_SIZE);
|
||||
let start_blk_no = start_offset as usize / chunk_size;
|
||||
let end_blk_no = div_round_up(end_offset as usize, chunk_size);
|
||||
Self {
|
||||
start_blk_no,
|
||||
end_blk_no,
|
||||
blobs_at,
|
||||
max_read_size,
|
||||
chunk_size,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,12 +237,18 @@ impl ChunkedVectoredReadBuilder {
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
max_read_size: usize,
|
||||
align: usize,
|
||||
) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, Some(max_read_size))
|
||||
Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), align)
|
||||
}
|
||||
|
||||
pub(crate) fn new_streaming(start_offset: u64, end_offset: u64, meta: BlobMeta) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, None)
|
||||
pub(crate) fn new_streaming(
|
||||
start_offset: u64,
|
||||
end_offset: u64,
|
||||
meta: BlobMeta,
|
||||
align: usize,
|
||||
) -> Self {
|
||||
Self::new_impl(start_offset, end_offset, meta, None, align)
|
||||
}
|
||||
|
||||
/// Attempts to extend the current read with a new blob if the new blob resides in the same or the immediate next chunk.
|
||||
@@ -247,12 +256,12 @@ impl ChunkedVectoredReadBuilder {
|
||||
/// The resulting size also must be below the max read size.
|
||||
pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
|
||||
tracing::trace!(start, end, "trying to extend");
|
||||
let start_blk_no = start as usize / Self::CHUNK_SIZE;
|
||||
let end_blk_no = div_round_up(end as usize, Self::CHUNK_SIZE);
|
||||
let start_blk_no = start as usize / self.chunk_size;
|
||||
let end_blk_no = div_round_up(end as usize, self.chunk_size);
|
||||
|
||||
let not_limited_by_max_read_size = {
|
||||
if let Some(max_read_size) = self.max_read_size {
|
||||
let coalesced_size = (end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE;
|
||||
let coalesced_size = (end_blk_no - self.start_blk_no) * self.chunk_size;
|
||||
coalesced_size <= max_read_size
|
||||
} else {
|
||||
true
|
||||
@@ -283,12 +292,12 @@ impl ChunkedVectoredReadBuilder {
|
||||
}
|
||||
|
||||
pub(crate) fn size(&self) -> usize {
|
||||
(self.end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE
|
||||
(self.end_blk_no - self.start_blk_no) * self.chunk_size
|
||||
}
|
||||
|
||||
pub(crate) fn build(self) -> VectoredRead {
|
||||
let start = (self.start_blk_no * Self::CHUNK_SIZE) as u64;
|
||||
let end = (self.end_blk_no * Self::CHUNK_SIZE) as u64;
|
||||
let start = (self.start_blk_no * self.chunk_size) as u64;
|
||||
let end = (self.end_blk_no * self.chunk_size) as u64;
|
||||
VectoredRead {
|
||||
start,
|
||||
end,
|
||||
@@ -319,14 +328,18 @@ pub struct VectoredReadPlanner {
|
||||
prev: Option<(Key, Lsn, u64, BlobFlag)>,
|
||||
|
||||
max_read_size: usize,
|
||||
|
||||
align: usize,
|
||||
}
|
||||
|
||||
impl VectoredReadPlanner {
|
||||
pub fn new(max_read_size: usize) -> Self {
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
Self {
|
||||
blobs: BTreeMap::new(),
|
||||
prev: None,
|
||||
max_read_size,
|
||||
align,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -405,6 +418,7 @@ impl VectoredReadPlanner {
|
||||
end_offset,
|
||||
BlobMeta { key, lsn },
|
||||
self.max_read_size,
|
||||
self.align,
|
||||
);
|
||||
|
||||
let prev_read_builder = current_read_builder.replace(next_read_builder);
|
||||
@@ -458,13 +472,13 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
);
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
|
||||
let align = virtual_file::get_io_buffer_alignment() as u64;
|
||||
debug_assert_eq!(
|
||||
read.start % ALIGN,
|
||||
read.start % align,
|
||||
0,
|
||||
"Read start at {} does not satisfy the required io buffer alignment ({} bytes)",
|
||||
read.start,
|
||||
ALIGN
|
||||
align
|
||||
);
|
||||
}
|
||||
|
||||
@@ -539,18 +553,22 @@ pub struct StreamingVectoredReadPlanner {
|
||||
max_cnt: usize,
|
||||
/// Size of the current batch
|
||||
cnt: usize,
|
||||
|
||||
align: usize,
|
||||
}
|
||||
|
||||
impl StreamingVectoredReadPlanner {
|
||||
pub fn new(max_read_size: u64, max_cnt: usize) -> Self {
|
||||
assert!(max_cnt > 0);
|
||||
assert!(max_read_size > 0);
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
Self {
|
||||
read_builder: None,
|
||||
prev: None,
|
||||
max_cnt,
|
||||
max_read_size,
|
||||
cnt: 0,
|
||||
align,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -603,6 +621,7 @@ impl StreamingVectoredReadPlanner {
|
||||
start_offset,
|
||||
end_offset,
|
||||
BlobMeta { key, lsn },
|
||||
self.align,
|
||||
))
|
||||
};
|
||||
}
|
||||
@@ -637,9 +656,9 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) {
|
||||
const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
|
||||
assert_eq!(read.start % ALIGN, 0);
|
||||
assert_eq!(read.start / ALIGN, offset_range.first().unwrap().2 / ALIGN);
|
||||
let align = virtual_file::get_io_buffer_alignment() as u64;
|
||||
assert_eq!(read.start % align, 0);
|
||||
assert_eq!(read.start / align, offset_range.first().unwrap().2 / align);
|
||||
|
||||
let expected_offsets_in_read: Vec<_> = offset_range.iter().map(|o| o.2).collect();
|
||||
|
||||
@@ -657,27 +676,32 @@ mod tests {
|
||||
fn planner_chunked_coalesce_all_test() {
|
||||
use crate::virtual_file;
|
||||
|
||||
const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;
|
||||
let chunk_size = virtual_file::get_io_buffer_alignment() as u64;
|
||||
|
||||
let max_read_size = CHUNK_SIZE as usize * 8;
|
||||
// The test explicitly does not check chunk size < 512
|
||||
if chunk_size < 512 {
|
||||
return;
|
||||
}
|
||||
|
||||
let max_read_size = chunk_size as usize * 8;
|
||||
let key = Key::MIN;
|
||||
let lsn = Lsn(0);
|
||||
|
||||
let blob_descriptions = [
|
||||
(key, lsn, CHUNK_SIZE / 8, BlobFlag::None), // Read 1 BEGIN
|
||||
(key, lsn, CHUNK_SIZE / 4, BlobFlag::Ignore), // Gap
|
||||
(key, lsn, CHUNK_SIZE / 2, BlobFlag::None),
|
||||
(key, lsn, CHUNK_SIZE - 2, BlobFlag::Ignore), // Gap
|
||||
(key, lsn, CHUNK_SIZE, BlobFlag::None),
|
||||
(key, lsn, CHUNK_SIZE * 2 - 1, BlobFlag::None),
|
||||
(key, lsn, CHUNK_SIZE * 2 + 1, BlobFlag::Ignore), // Gap
|
||||
(key, lsn, CHUNK_SIZE * 3 + 1, BlobFlag::None),
|
||||
(key, lsn, CHUNK_SIZE * 5 + 1, BlobFlag::None),
|
||||
(key, lsn, CHUNK_SIZE * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.
|
||||
(key, lsn, CHUNK_SIZE * 7 + 1, BlobFlag::None),
|
||||
(key, lsn, CHUNK_SIZE * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)
|
||||
(key, lsn, CHUNK_SIZE * 9, BlobFlag::Ignore), // ==== skipped a chunk
|
||||
(key, lsn, CHUNK_SIZE * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)
|
||||
(key, lsn, chunk_size / 8, BlobFlag::None), // Read 1 BEGIN
|
||||
(key, lsn, chunk_size / 4, BlobFlag::Ignore), // Gap
|
||||
(key, lsn, chunk_size / 2, BlobFlag::None),
|
||||
(key, lsn, chunk_size - 2, BlobFlag::Ignore), // Gap
|
||||
(key, lsn, chunk_size, BlobFlag::None),
|
||||
(key, lsn, chunk_size * 2 - 1, BlobFlag::None),
|
||||
(key, lsn, chunk_size * 2 + 1, BlobFlag::Ignore), // Gap
|
||||
(key, lsn, chunk_size * 3 + 1, BlobFlag::None),
|
||||
(key, lsn, chunk_size * 5 + 1, BlobFlag::None),
|
||||
(key, lsn, chunk_size * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.
|
||||
(key, lsn, chunk_size * 7 + 1, BlobFlag::None),
|
||||
(key, lsn, chunk_size * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)
|
||||
(key, lsn, chunk_size * 9, BlobFlag::Ignore), // ==== skipped a chunk
|
||||
(key, lsn, chunk_size * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)
|
||||
];
|
||||
|
||||
let ranges = [
|
||||
@@ -756,19 +780,19 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn planner_replacement_test() {
|
||||
const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;
|
||||
let max_read_size = 128 * CHUNK_SIZE as usize;
|
||||
let chunk_size = virtual_file::get_io_buffer_alignment() as u64;
|
||||
let max_read_size = 128 * chunk_size as usize;
|
||||
let first_key = Key::MIN;
|
||||
let second_key = first_key.next();
|
||||
let lsn = Lsn(0);
|
||||
|
||||
let blob_descriptions = vec![
|
||||
(first_key, lsn, 0, BlobFlag::None), // First in read 1
|
||||
(first_key, lsn, CHUNK_SIZE, BlobFlag::None), // Last in read 1
|
||||
(second_key, lsn, 2 * CHUNK_SIZE, BlobFlag::ReplaceAll),
|
||||
(second_key, lsn, 3 * CHUNK_SIZE, BlobFlag::None),
|
||||
(second_key, lsn, 4 * CHUNK_SIZE, BlobFlag::ReplaceAll), // First in read 2
|
||||
(second_key, lsn, 5 * CHUNK_SIZE, BlobFlag::None), // Last in read 2
|
||||
(first_key, lsn, chunk_size, BlobFlag::None), // Last in read 1
|
||||
(second_key, lsn, 2 * chunk_size, BlobFlag::ReplaceAll),
|
||||
(second_key, lsn, 3 * chunk_size, BlobFlag::None),
|
||||
(second_key, lsn, 4 * chunk_size, BlobFlag::ReplaceAll), // First in read 2
|
||||
(second_key, lsn, 5 * chunk_size, BlobFlag::None), // Last in read 2
|
||||
];
|
||||
|
||||
let ranges = [&blob_descriptions[0..2], &blob_descriptions[4..]];
|
||||
@@ -778,7 +802,7 @@ mod tests {
|
||||
planner.handle(key, lsn, offset, flag);
|
||||
}
|
||||
|
||||
planner.handle_range_end(6 * CHUNK_SIZE);
|
||||
planner.handle_range_end(6 * chunk_size);
|
||||
|
||||
let reads = planner.finish();
|
||||
assert_eq!(reads.len(), 2);
|
||||
@@ -923,6 +947,7 @@ mod tests {
|
||||
let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;
|
||||
let mut buf = BytesMut::with_capacity(reserved_bytes);
|
||||
|
||||
let align = virtual_file::get_io_buffer_alignment();
|
||||
let vectored_blob_reader = VectoredBlobReader::new(&file);
|
||||
let meta = BlobMeta {
|
||||
key: Key::MIN,
|
||||
@@ -934,7 +959,8 @@ mod tests {
|
||||
if idx + 1 == offsets.len() {
|
||||
continue;
|
||||
}
|
||||
let read_builder = ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096);
|
||||
let read_builder =
|
||||
ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, align);
|
||||
let read = read_builder.build();
|
||||
let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
|
||||
assert_eq!(result.blobs.len(), 1);
|
||||
|
||||
@@ -23,12 +23,10 @@ use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use std::fs::File;
|
||||
use std::io::{Error, ErrorKind, Seek, SeekFrom};
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::os::unix::fs::OpenOptionsExt;
|
||||
use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};
|
||||
|
||||
use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
|
||||
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
use tokio::time::Instant;
|
||||
|
||||
@@ -40,7 +38,7 @@ pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
|
||||
mod metadata;
|
||||
mod open_options;
|
||||
use self::owned_buffers_io::write::OwnedAsyncWriter;
|
||||
pub(crate) use api::IoMode;
|
||||
pub(crate) use api::DirectIoMode;
|
||||
pub(crate) use io_engine::IoEngineKind;
|
||||
pub(crate) use metadata::Metadata;
|
||||
pub(crate) use open_options::*;
|
||||
@@ -63,171 +61,6 @@ pub(crate) mod owned_buffers_io {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VirtualFile {
|
||||
inner: VirtualFileInner,
|
||||
_mode: IoMode,
|
||||
}
|
||||
|
||||
impl VirtualFile {
|
||||
/// Open a file in read-only mode. Like File::open.
|
||||
pub async fn open<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let inner = VirtualFileInner::open(path, ctx).await?;
|
||||
Ok(VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
})
|
||||
}
|
||||
|
||||
/// Open a file in read-only mode. Like File::open.
|
||||
///
|
||||
/// `O_DIRECT` will be enabled base on `virtual_file_io_mode`.
|
||||
pub async fn open_v2<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
Self::open_with_options_v2(path.as_ref(), OpenOptions::new().read(true), ctx).await
|
||||
}
|
||||
|
||||
pub async fn create<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let inner = VirtualFileInner::create(path, ctx).await?;
|
||||
Ok(VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn create_v2<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Self, std::io::Error> {
|
||||
VirtualFile::open_with_options_v2(
|
||||
path.as_ref(),
|
||||
OpenOptions::new().write(true).create(true).truncate(true),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn open_with_options<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
open_options: &OpenOptions,
|
||||
ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
|
||||
Ok(VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn open_with_options_v2<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
open_options: &OpenOptions,
|
||||
ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
|
||||
) -> Result<Self, std::io::Error> {
|
||||
let file = match get_io_mode() {
|
||||
IoMode::Buffered => {
|
||||
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
|
||||
VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Buffered,
|
||||
}
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
IoMode::Direct => {
|
||||
let inner = VirtualFileInner::open_with_options(
|
||||
path,
|
||||
open_options.clone().custom_flags(nix::libc::O_DIRECT),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
VirtualFile {
|
||||
inner,
|
||||
_mode: IoMode::Direct,
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Utf8Path {
|
||||
self.inner.path.as_path()
|
||||
}
|
||||
|
||||
pub async fn crashsafe_overwrite<B: BoundedBuf<Buf = Buf> + Send, Buf: IoBuf + Send>(
|
||||
final_path: Utf8PathBuf,
|
||||
tmp_path: Utf8PathBuf,
|
||||
content: B,
|
||||
) -> std::io::Result<()> {
|
||||
VirtualFileInner::crashsafe_overwrite(final_path, tmp_path, content).await
|
||||
}
|
||||
|
||||
pub async fn sync_all(&self) -> Result<(), Error> {
|
||||
self.inner.sync_all().await
|
||||
}
|
||||
|
||||
pub async fn sync_data(&self) -> Result<(), Error> {
|
||||
self.inner.sync_data().await
|
||||
}
|
||||
|
||||
pub async fn metadata(&self) -> Result<Metadata, Error> {
|
||||
self.inner.metadata().await
|
||||
}
|
||||
|
||||
pub fn remove(self) {
|
||||
self.inner.remove();
|
||||
}
|
||||
|
||||
pub async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
|
||||
self.inner.seek(pos).await
|
||||
}
|
||||
|
||||
pub async fn read_exact_at<Buf>(
|
||||
&self,
|
||||
slice: Slice<Buf>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Slice<Buf>, Error>
|
||||
where
|
||||
Buf: IoBufMut + Send,
|
||||
{
|
||||
self.inner.read_exact_at(slice, offset, ctx).await
|
||||
}
|
||||
|
||||
pub async fn read_exact_at_page(
|
||||
&self,
|
||||
page: PageWriteGuard<'static>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<PageWriteGuard<'static>, Error> {
|
||||
self.inner.read_exact_at_page(page, offset, ctx).await
|
||||
}
|
||||
|
||||
pub async fn write_all_at<Buf: IoBuf + Send>(
|
||||
&self,
|
||||
buf: FullSlice<Buf>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<(), Error>) {
|
||||
self.inner.write_all_at(buf, offset, ctx).await
|
||||
}
|
||||
|
||||
pub async fn write_all<Buf: IoBuf + Send>(
|
||||
&mut self,
|
||||
buf: FullSlice<Buf>,
|
||||
ctx: &RequestContext,
|
||||
) -> (FullSlice<Buf>, Result<usize, Error>) {
|
||||
self.inner.write_all(buf, ctx).await
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// A virtual file descriptor. You can use this just like std::fs::File, but internally
|
||||
/// the underlying file is closed if the system is low on file descriptors,
|
||||
@@ -244,7 +77,7 @@ impl VirtualFile {
|
||||
/// 'tag' field is used to detect whether the handle still is valid or not.
|
||||
///
|
||||
#[derive(Debug)]
|
||||
pub struct VirtualFileInner {
|
||||
pub struct VirtualFile {
|
||||
/// Lazy handle to the global file descriptor cache. The slot that this points to
|
||||
/// might contain our File, or it may be empty, or it may contain a File that
|
||||
/// belongs to a different VirtualFile.
|
||||
@@ -517,12 +350,12 @@ macro_rules! with_file {
|
||||
}};
|
||||
}
|
||||
|
||||
impl VirtualFileInner {
|
||||
impl VirtualFile {
|
||||
/// Open a file in read-only mode. Like File::open.
|
||||
pub async fn open<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VirtualFileInner, std::io::Error> {
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
Self::open_with_options(path.as_ref(), OpenOptions::new().read(true), ctx).await
|
||||
}
|
||||
|
||||
@@ -531,7 +364,7 @@ impl VirtualFileInner {
|
||||
pub async fn create<P: AsRef<Utf8Path>>(
|
||||
path: P,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<VirtualFileInner, std::io::Error> {
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
Self::open_with_options(
|
||||
path.as_ref(),
|
||||
OpenOptions::new().write(true).create(true).truncate(true),
|
||||
@@ -549,7 +382,7 @@ impl VirtualFileInner {
|
||||
path: P,
|
||||
open_options: &OpenOptions,
|
||||
_ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
|
||||
) -> Result<VirtualFileInner, std::io::Error> {
|
||||
) -> Result<VirtualFile, std::io::Error> {
|
||||
let path_ref = path.as_ref();
|
||||
let path_str = path_ref.to_string();
|
||||
let parts = path_str.split('/').collect::<Vec<&str>>();
|
||||
@@ -590,7 +423,7 @@ impl VirtualFileInner {
|
||||
reopen_options.create_new(false);
|
||||
reopen_options.truncate(false);
|
||||
|
||||
let vfile = VirtualFileInner {
|
||||
let vfile = VirtualFile {
|
||||
handle: RwLock::new(handle),
|
||||
pos: 0,
|
||||
path: path_ref.to_path_buf(),
|
||||
@@ -1201,21 +1034,6 @@ impl tokio_epoll_uring::IoFd for FileGuard {
|
||||
|
||||
#[cfg(test)]
|
||||
impl VirtualFile {
|
||||
pub(crate) async fn read_blk(
|
||||
&self,
|
||||
blknum: u32,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
|
||||
self.inner.read_blk(blknum, ctx).await
|
||||
}
|
||||
|
||||
async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
|
||||
self.inner.read_to_end(buf, ctx).await
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl VirtualFileInner {
|
||||
pub(crate) async fn read_blk(
|
||||
&self,
|
||||
blknum: u32,
|
||||
@@ -1249,7 +1067,7 @@ impl VirtualFileInner {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for VirtualFileInner {
|
||||
impl Drop for VirtualFile {
|
||||
/// If a VirtualFile is dropped, close the underlying file if it was open.
|
||||
fn drop(&mut self) {
|
||||
let handle = self.handle.get_mut();
|
||||
@@ -1325,10 +1143,15 @@ impl OpenFiles {
|
||||
/// server startup.
|
||||
///
|
||||
#[cfg(not(test))]
|
||||
pub fn init(num_slots: usize, engine: IoEngineKind) {
|
||||
pub fn init(num_slots: usize, engine: IoEngineKind, io_buffer_alignment: usize) {
|
||||
if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
|
||||
panic!("virtual_file::init called twice");
|
||||
}
|
||||
if set_io_buffer_alignment(io_buffer_alignment).is_err() {
|
||||
panic!(
|
||||
"IO buffer alignment needs to be a power of two and greater than 512, got {io_buffer_alignment}"
|
||||
);
|
||||
}
|
||||
io_engine::init(engine);
|
||||
crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
|
||||
}
|
||||
@@ -1352,20 +1175,47 @@ fn get_open_files() -> &'static OpenFiles {
|
||||
}
|
||||
}
|
||||
|
||||
static IO_BUFFER_ALIGNMENT: AtomicUsize = AtomicUsize::new(DEFAULT_IO_BUFFER_ALIGNMENT);
|
||||
|
||||
/// Returns true if the alignment is a power of two and is greater or equal to 512.
|
||||
fn is_valid_io_buffer_alignment(align: usize) -> bool {
|
||||
align.is_power_of_two() && align >= 512
|
||||
}
|
||||
|
||||
/// Sets IO buffer alignment requirement. Returns error if the alignment requirement is
|
||||
/// not a power of two or less than 512 bytes.
|
||||
#[allow(unused)]
|
||||
pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
|
||||
if is_valid_io_buffer_alignment(align) {
|
||||
IO_BUFFER_ALIGNMENT.store(align, std::sync::atomic::Ordering::Relaxed);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(align)
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the io buffer alignment.
|
||||
pub(crate) const fn get_io_buffer_alignment() -> usize {
|
||||
DEFAULT_IO_BUFFER_ALIGNMENT
|
||||
///
|
||||
/// This function should be used for getting the actual alignment value to use.
|
||||
pub(crate) fn get_io_buffer_alignment() -> usize {
|
||||
let align = IO_BUFFER_ALIGNMENT.load(std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
if cfg!(test) {
|
||||
let env_var_name = "NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT";
|
||||
if let Some(test_align) = utils::env::var(env_var_name) {
|
||||
if is_valid_io_buffer_alignment(test_align) {
|
||||
test_align
|
||||
} else {
|
||||
panic!("IO buffer alignment needs to be a power of two and greater than 512, got {test_align}");
|
||||
}
|
||||
} else {
|
||||
align
|
||||
}
|
||||
} else {
|
||||
align
|
||||
}
|
||||
}
|
||||
|
||||
static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
|
||||
|
||||
pub(crate) fn set_io_mode(mode: IoMode) {
|
||||
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub(crate) fn get_io_mode() -> IoMode {
|
||||
IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::context::DownloadBehavior;
|
||||
@@ -1674,7 +1524,7 @@ mod tests {
|
||||
// Open the file many times.
|
||||
let mut files = Vec::new();
|
||||
for _ in 0..VIRTUAL_FILES {
|
||||
let f = VirtualFileInner::open_with_options(
|
||||
let f = VirtualFile::open_with_options(
|
||||
&test_file_path,
|
||||
OpenOptions::new().read(true),
|
||||
&ctx,
|
||||
@@ -1726,7 +1576,7 @@ mod tests {
|
||||
let path = testdir.join("myfile");
|
||||
let tmp_path = testdir.join("myfile.tmp");
|
||||
|
||||
VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
|
||||
VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
@@ -1735,7 +1585,7 @@ mod tests {
|
||||
assert!(!tmp_path.exists());
|
||||
drop(file);
|
||||
|
||||
VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
|
||||
VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
|
||||
@@ -1758,7 +1608,7 @@ mod tests {
|
||||
std::fs::write(&tmp_path, "some preexisting junk that should be removed").unwrap();
|
||||
assert!(tmp_path.exists());
|
||||
|
||||
VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
|
||||
VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -146,8 +146,6 @@ ConstructDeltaMessage()
|
||||
if (RootTable.role_table)
|
||||
{
|
||||
JsonbValue roles;
|
||||
HASH_SEQ_STATUS status;
|
||||
RoleEntry *entry;
|
||||
|
||||
roles.type = jbvString;
|
||||
roles.val.string.val = "roles";
|
||||
@@ -155,6 +153,9 @@ ConstructDeltaMessage()
|
||||
pushJsonbValue(&state, WJB_KEY, &roles);
|
||||
pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
|
||||
|
||||
HASH_SEQ_STATUS status;
|
||||
RoleEntry *entry;
|
||||
|
||||
hash_seq_init(&status, RootTable.role_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
@@ -189,12 +190,10 @@ ConstructDeltaMessage()
|
||||
}
|
||||
pushJsonbValue(&state, WJB_END_ARRAY, NULL);
|
||||
}
|
||||
{
|
||||
JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
|
||||
Jsonb *jsonb = JsonbValueToJsonb(result);
|
||||
JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
|
||||
Jsonb *jsonb = JsonbValueToJsonb(result);
|
||||
|
||||
return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
|
||||
}
|
||||
return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
|
||||
}
|
||||
|
||||
#define ERROR_SIZE 1024
|
||||
@@ -273,28 +272,32 @@ SendDeltasToControlPlane()
|
||||
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
|
||||
}
|
||||
|
||||
char *message = ConstructDeltaMessage();
|
||||
ErrorString str;
|
||||
|
||||
str.size = 0;
|
||||
|
||||
curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
|
||||
curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
|
||||
|
||||
const int num_retries = 5;
|
||||
CURLcode curl_status;
|
||||
|
||||
for (int i = 0; i < num_retries; i++)
|
||||
{
|
||||
if ((curl_status = curl_easy_perform(handle)) == 0)
|
||||
break;
|
||||
elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
|
||||
pg_usleep(1000 * 1000);
|
||||
}
|
||||
if (curl_status != CURLE_OK)
|
||||
{
|
||||
elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *message = ConstructDeltaMessage();
|
||||
ErrorString str;
|
||||
const int num_retries = 5;
|
||||
CURLcode curl_status;
|
||||
long response_code;
|
||||
|
||||
str.size = 0;
|
||||
|
||||
curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
|
||||
curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
|
||||
|
||||
for (int i = 0; i < num_retries; i++)
|
||||
{
|
||||
if ((curl_status = curl_easy_perform(handle)) == 0)
|
||||
break;
|
||||
elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
|
||||
pg_usleep(1000 * 1000);
|
||||
}
|
||||
if (curl_status != CURLE_OK)
|
||||
elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
|
||||
|
||||
if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
|
||||
{
|
||||
if (response_code != 200)
|
||||
@@ -373,11 +376,10 @@ MergeTable()
|
||||
|
||||
if (old_table->db_table)
|
||||
{
|
||||
InitDbTableIfNeeded();
|
||||
DbEntry *entry;
|
||||
HASH_SEQ_STATUS status;
|
||||
|
||||
InitDbTableIfNeeded();
|
||||
|
||||
hash_seq_init(&status, old_table->db_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
@@ -419,11 +421,10 @@ MergeTable()
|
||||
|
||||
if (old_table->role_table)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
RoleEntry *entry;
|
||||
HASH_SEQ_STATUS status;
|
||||
|
||||
InitRoleTableIfNeeded();
|
||||
|
||||
hash_seq_init(&status, old_table->role_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
@@ -514,12 +515,9 @@ RoleIsNeonSuperuser(const char *role_name)
|
||||
static void
|
||||
HandleCreateDb(CreatedbStmt *stmt)
|
||||
{
|
||||
InitDbTableIfNeeded();
|
||||
DefElem *downer = NULL;
|
||||
ListCell *option;
|
||||
bool found = false;
|
||||
DbEntry *entry;
|
||||
|
||||
InitDbTableIfNeeded();
|
||||
|
||||
foreach(option, stmt->options)
|
||||
{
|
||||
@@ -528,11 +526,13 @@ HandleCreateDb(CreatedbStmt *stmt)
|
||||
if (strcmp(defel->defname, "owner") == 0)
|
||||
downer = defel;
|
||||
}
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->dbname,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
entry = hash_search(CurrentDdlTable->db_table,
|
||||
stmt->dbname,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
|
||||
@@ -554,24 +554,21 @@ HandleCreateDb(CreatedbStmt *stmt)
|
||||
static void
|
||||
HandleAlterOwner(AlterOwnerStmt *stmt)
|
||||
{
|
||||
const char *name;
|
||||
bool found = false;
|
||||
DbEntry *entry;
|
||||
const char *new_owner;
|
||||
|
||||
if (stmt->objectType != OBJECT_DATABASE)
|
||||
return;
|
||||
InitDbTableIfNeeded();
|
||||
const char *name = strVal(stmt->object);
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
name,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
name = strVal(stmt->object);
|
||||
entry = hash_search(CurrentDdlTable->db_table,
|
||||
name,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
const char *new_owner = get_rolespec_name(stmt->newowner);
|
||||
|
||||
new_owner = get_rolespec_name(stmt->newowner);
|
||||
if (RoleIsNeonSuperuser(new_owner))
|
||||
elog(ERROR, "can't alter owner to neon_superuser");
|
||||
entry->owner = get_role_oid(new_owner, false);
|
||||
@@ -581,23 +578,21 @@ HandleAlterOwner(AlterOwnerStmt *stmt)
|
||||
static void
|
||||
HandleDbRename(RenameStmt *stmt)
|
||||
{
|
||||
bool found = false;
|
||||
DbEntry *entry;
|
||||
DbEntry *entry_for_new_name;
|
||||
|
||||
Assert(stmt->renameType == OBJECT_DATABASE);
|
||||
InitDbTableIfNeeded();
|
||||
entry = hash_search(CurrentDdlTable->db_table,
|
||||
stmt->subname,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->subname,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
DbEntry *entry_for_new_name = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->newname,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
|
||||
entry_for_new_name = hash_search(CurrentDdlTable->db_table,
|
||||
stmt->newname,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
entry_for_new_name->type = Op_Set;
|
||||
|
||||
if (found)
|
||||
{
|
||||
if (entry->old_name[0] != '\0')
|
||||
@@ -605,7 +600,8 @@ HandleDbRename(RenameStmt *stmt)
|
||||
else
|
||||
strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
|
||||
entry_for_new_name->owner = entry->owner;
|
||||
hash_search(CurrentDdlTable->db_table,
|
||||
hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->subname,
|
||||
HASH_REMOVE,
|
||||
NULL);
|
||||
@@ -620,15 +616,14 @@ HandleDbRename(RenameStmt *stmt)
|
||||
static void
|
||||
HandleDropDb(DropdbStmt *stmt)
|
||||
{
|
||||
bool found = false;
|
||||
DbEntry *entry;
|
||||
|
||||
InitDbTableIfNeeded();
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->dbname,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
entry = hash_search(CurrentDdlTable->db_table,
|
||||
stmt->dbname,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
entry->type = Op_Delete;
|
||||
entry->owner = InvalidOid;
|
||||
if (!found)
|
||||
@@ -638,14 +633,16 @@ HandleDropDb(DropdbStmt *stmt)
|
||||
static void
|
||||
HandleCreateRole(CreateRoleStmt *stmt)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
bool found = false;
|
||||
RoleEntry *entry;
|
||||
DefElem *dpass;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->role,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
DefElem *dpass = NULL;
|
||||
ListCell *option;
|
||||
|
||||
InitRoleTableIfNeeded();
|
||||
|
||||
dpass = NULL;
|
||||
foreach(option, stmt->options)
|
||||
{
|
||||
DefElem *defel = lfirst(option);
|
||||
@@ -653,11 +650,6 @@ HandleCreateRole(CreateRoleStmt *stmt)
|
||||
if (strcmp(defel->defname, "password") == 0)
|
||||
dpass = defel;
|
||||
}
|
||||
|
||||
entry = hash_search(CurrentDdlTable->role_table,
|
||||
stmt->role,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
if (dpass && dpass->arg)
|
||||
@@ -670,18 +662,14 @@ HandleCreateRole(CreateRoleStmt *stmt)
|
||||
static void
|
||||
HandleAlterRole(AlterRoleStmt *stmt)
|
||||
{
|
||||
const char *role_name = stmt->role->rolename;
|
||||
DefElem *dpass;
|
||||
ListCell *option;
|
||||
bool found = false;
|
||||
RoleEntry *entry;
|
||||
|
||||
InitRoleTableIfNeeded();
|
||||
DefElem *dpass = NULL;
|
||||
ListCell *option;
|
||||
const char *role_name = stmt->role->rolename;
|
||||
|
||||
if (RoleIsNeonSuperuser(role_name) && !superuser())
|
||||
elog(ERROR, "can't ALTER neon_superuser");
|
||||
|
||||
dpass = NULL;
|
||||
foreach(option, stmt->options)
|
||||
{
|
||||
DefElem *defel = lfirst(option);
|
||||
@@ -692,11 +680,13 @@ HandleAlterRole(AlterRoleStmt *stmt)
|
||||
/* We only care about updates to the password */
|
||||
if (!dpass)
|
||||
return;
|
||||
bool found = false;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
role_name,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
entry = hash_search(CurrentDdlTable->role_table,
|
||||
role_name,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
if (dpass->arg)
|
||||
@@ -709,22 +699,20 @@ HandleAlterRole(AlterRoleStmt *stmt)
|
||||
static void
|
||||
HandleRoleRename(RenameStmt *stmt)
|
||||
{
|
||||
bool found = false;
|
||||
RoleEntry *entry;
|
||||
RoleEntry *entry_for_new_name;
|
||||
|
||||
Assert(stmt->renameType == OBJECT_ROLE);
|
||||
InitRoleTableIfNeeded();
|
||||
Assert(stmt->renameType == OBJECT_ROLE);
|
||||
bool found = false;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->subname,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
|
||||
entry = hash_search(CurrentDdlTable->role_table,
|
||||
stmt->subname,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
|
||||
entry_for_new_name = hash_search(CurrentDdlTable->role_table,
|
||||
stmt->newname,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
RoleEntry *entry_for_new_name = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->newname,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
|
||||
entry_for_new_name->type = Op_Set;
|
||||
if (found)
|
||||
@@ -750,9 +738,8 @@ HandleRoleRename(RenameStmt *stmt)
|
||||
static void
|
||||
HandleDropRole(DropRoleStmt *stmt)
|
||||
{
|
||||
ListCell *item;
|
||||
|
||||
InitRoleTableIfNeeded();
|
||||
ListCell *item;
|
||||
|
||||
foreach(item, stmt->roles)
|
||||
{
|
||||
|
||||
@@ -170,14 +170,12 @@ lfc_disable(char const *op)
|
||||
|
||||
if (lfc_desc > 0)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* If the reason of error is ENOSPC, then truncation of file may
|
||||
* help to reclaim some space
|
||||
*/
|
||||
pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE);
|
||||
rc = ftruncate(lfc_desc, 0);
|
||||
int rc = ftruncate(lfc_desc, 0);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
if (rc < 0)
|
||||
@@ -618,7 +616,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
*/
|
||||
if (entry->bitmap[chunk_offs >> 5] == 0)
|
||||
{
|
||||
bool has_remaining_pages = false;
|
||||
bool has_remaining_pages;
|
||||
|
||||
for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
|
||||
{
|
||||
@@ -668,6 +666,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
BufferTag tag;
|
||||
FileCacheEntry *entry;
|
||||
ssize_t rc;
|
||||
bool result = true;
|
||||
uint32 hash;
|
||||
uint64 generation;
|
||||
uint32 entry_offset;
|
||||
@@ -926,10 +925,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
/* We can reuse a hole that was left behind when the LFC was shrunk previously */
|
||||
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
|
||||
uint32 offset = hole->offset;
|
||||
bool hole_found;
|
||||
bool found;
|
||||
|
||||
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found);
|
||||
CriticalAssert(hole_found);
|
||||
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
|
||||
CriticalAssert(found);
|
||||
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = offset; /* reuse the hole */
|
||||
@@ -1005,7 +1004,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
|
||||
Datum result;
|
||||
HeapTuple tuple;
|
||||
char const *key;
|
||||
uint64 value = 0;
|
||||
uint64 value;
|
||||
Datum values[NUM_NEON_GET_STATS_COLS];
|
||||
bool nulls[NUM_NEON_GET_STATS_COLS];
|
||||
|
||||
|
||||
@@ -116,6 +116,8 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
|
||||
{
|
||||
uint8 count;
|
||||
uint32 index;
|
||||
size_t i;
|
||||
size_t j;
|
||||
|
||||
TimestampTz now = GetCurrentTimestamp();
|
||||
/* Use the first "k" (registerWidth) bits as a zero based index */
|
||||
|
||||
@@ -89,6 +89,7 @@ typedef struct
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook = NULL;
|
||||
static void walproposer_shmem_request(void);
|
||||
#endif
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
static PagestoreShmemState *pagestore_shared;
|
||||
@@ -440,8 +441,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
return false;
|
||||
}
|
||||
shard->state = PS_Connecting_Startup;
|
||||
/* fallthrough */
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case PS_Connecting_Startup:
|
||||
{
|
||||
char *pagestream_query;
|
||||
@@ -452,6 +453,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
|
||||
do
|
||||
{
|
||||
WaitEvent event;
|
||||
|
||||
switch (poll_result)
|
||||
{
|
||||
default: /* unknown/unused states are handled as a failed connection */
|
||||
@@ -582,8 +585,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
}
|
||||
|
||||
shard->state = PS_Connecting_PageStream;
|
||||
/* fallthrough */
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case PS_Connecting_PageStream:
|
||||
{
|
||||
neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream");
|
||||
@@ -628,8 +631,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
}
|
||||
|
||||
shard->state = PS_Connected;
|
||||
/* fallthrough */
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case PS_Connected:
|
||||
/*
|
||||
* We successfully connected. Future connections to this PageServer
|
||||
|
||||
@@ -94,6 +94,7 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
|
||||
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
|
||||
uint64 bucket_accum;
|
||||
int i = 0;
|
||||
Datum getpage_wait_str;
|
||||
|
||||
metrics[i].name = "getpage_wait_seconds_count";
|
||||
metrics[i].is_bucket = false;
|
||||
@@ -223,6 +224,7 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
|
||||
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||
Datum values[3];
|
||||
bool nulls[3];
|
||||
Datum getpage_wait_str;
|
||||
neon_per_backend_counters totals = {0};
|
||||
metric_t *metrics;
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#define NEON_PGVERSIONCOMPAT_H
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "storage/buf_internals.h"
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 17
|
||||
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
|
||||
@@ -21,24 +20,11 @@
|
||||
NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
|
||||
)
|
||||
|
||||
/* These macros were turned into static inline functions in v16 */
|
||||
/* buftag population & RelFileNode/RelFileLocator rework */
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
static inline bool
|
||||
BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
|
||||
{
|
||||
return BUFFERTAGS_EQUAL(*tag1, *tag2);
|
||||
}
|
||||
|
||||
static inline void
|
||||
InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
|
||||
ForkNumber forkNum, BlockNumber blockNum)
|
||||
{
|
||||
INIT_BUFFERTAG(*tag, *rnode, forkNum, blockNum);
|
||||
}
|
||||
#endif
|
||||
#define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn)
|
||||
|
||||
/* RelFileNode -> RelFileLocator rework */
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
#define USE_RELFILENODE
|
||||
|
||||
#define RELFILEINFO_HDR "storage/relfilenode.h"
|
||||
@@ -87,6 +73,8 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
|
||||
|
||||
#define USE_RELFILELOCATOR
|
||||
|
||||
#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
|
||||
|
||||
#define RELFILEINFO_HDR "storage/relfilelocator.h"
|
||||
|
||||
#define NRelFileInfo RelFileLocator
|
||||
|
||||
@@ -213,6 +213,32 @@ extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
|
||||
extern void smgr_init_neon(void);
|
||||
extern void readahead_buffer_resize(int newsize, void *extra);
|
||||
|
||||
/* Neon storage manager functionality */
|
||||
|
||||
extern void neon_init(void);
|
||||
extern void neon_open(SMgrRelation reln);
|
||||
extern void neon_close(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
|
||||
extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
|
||||
extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
#else
|
||||
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nbuffers, bool skipFsync);
|
||||
#endif
|
||||
|
||||
#if PG_MAJORVERSION_NUM >=17
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nblocks);
|
||||
#else
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* LSN values associated with each request to the pageserver
|
||||
*/
|
||||
@@ -252,7 +278,13 @@ extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum,
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
neon_request_lsns request_lsns, void *buffer);
|
||||
#endif
|
||||
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks);
|
||||
extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
extern int64 neon_dbsize(Oid dbNode);
|
||||
extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber nblocks);
|
||||
extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
/* utils for neon relsize cache */
|
||||
extern void relsize_hash_init(void);
|
||||
|
||||
@@ -118,8 +118,6 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
|
||||
static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
|
||||
static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
|
||||
|
||||
static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
/*
|
||||
* Prefetch implementation:
|
||||
*
|
||||
@@ -217,7 +215,7 @@ typedef struct PrfHashEntry
|
||||
sizeof(BufferTag) \
|
||||
)
|
||||
|
||||
#define SH_EQUAL(tb, a, b) (BufferTagsEqual(&(a)->buftag, &(b)->buftag))
|
||||
#define SH_EQUAL(tb, a, b) (BUFFERTAGS_EQUAL((a)->buftag, (b)->buftag))
|
||||
#define SH_SCOPE static inline
|
||||
#define SH_DEFINE
|
||||
#define SH_DECLARE
|
||||
@@ -738,7 +736,7 @@ static void
|
||||
prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)
|
||||
{
|
||||
bool found;
|
||||
uint64 mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;
|
||||
uint64 mySlotNo = slot->my_ring_index;
|
||||
|
||||
NeonGetPageRequest request = {
|
||||
.req.tag = T_NeonGetPageRequest,
|
||||
@@ -805,19 +803,15 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
|
||||
bool is_prefetch)
|
||||
{
|
||||
uint64 min_ring_index;
|
||||
PrefetchRequest hashkey;
|
||||
PrefetchRequest req;
|
||||
#if USE_ASSERT_CHECKING
|
||||
bool any_hits = false;
|
||||
#endif
|
||||
/* We will never read further ahead than our buffer can store. */
|
||||
nblocks = Max(1, Min(nblocks, readahead_buffer_size));
|
||||
|
||||
/*
|
||||
* Use an intermediate PrefetchRequest struct as the hash key to ensure
|
||||
* correct alignment and that the padding bytes are cleared.
|
||||
*/
|
||||
memset(&hashkey.buftag, 0, sizeof(BufferTag));
|
||||
hashkey.buftag = tag;
|
||||
/* use an intermediate PrefetchRequest struct to ensure correct alignment */
|
||||
req.buftag = tag;
|
||||
|
||||
Retry:
|
||||
min_ring_index = UINT64_MAX;
|
||||
@@ -843,8 +837,8 @@ Retry:
|
||||
slot = NULL;
|
||||
entry = NULL;
|
||||
|
||||
hashkey.buftag.blockNum = tag.blockNum + i;
|
||||
entry = prfh_lookup(MyPState->prf_hash, &hashkey);
|
||||
req.buftag.blockNum = tag.blockNum + i;
|
||||
entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &req);
|
||||
|
||||
if (entry != NULL)
|
||||
{
|
||||
@@ -855,7 +849,7 @@ Retry:
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
ring_index < MyPState->ring_unused);
|
||||
Assert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));
|
||||
Assert(BUFFERTAGS_EQUAL(slot->buftag, req.buftag));
|
||||
|
||||
/*
|
||||
* If the caller specified a request LSN to use, only accept
|
||||
@@ -892,19 +886,12 @@ Retry:
|
||||
{
|
||||
min_ring_index = Min(min_ring_index, ring_index);
|
||||
/* The buffered request is good enough, return that index */
|
||||
if (is_prefetch)
|
||||
pgBufferUsage.prefetch.duplicates++;
|
||||
else
|
||||
pgBufferUsage.prefetch.hits++;
|
||||
pgBufferUsage.prefetch.duplicates++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!is_prefetch)
|
||||
{
|
||||
pgBufferUsage.prefetch.misses += 1;
|
||||
MyNeonCounters->getpage_prefetch_misses_total++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only leave the block above by finding that there's
|
||||
* no entry that can satisfy this request, either because there
|
||||
@@ -987,7 +974,7 @@ Retry:
|
||||
* We must update the slot data before insertion, because the hash
|
||||
* function reads the buffer tag from the slot.
|
||||
*/
|
||||
slot->buftag = hashkey.buftag;
|
||||
slot->buftag = req.buftag;
|
||||
slot->shard_no = get_shard_number(&tag);
|
||||
slot->my_ring_index = ring_index;
|
||||
|
||||
@@ -1465,6 +1452,7 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
BlockNumber blknos[XLR_MAX_BLOCK_ID];
|
||||
Page pageptrs[XLR_MAX_BLOCK_ID];
|
||||
int nregistered = 0;
|
||||
XLogRecPtr result = 0;
|
||||
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
@@ -1777,7 +1765,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
|
||||
/*
|
||||
* neon_init() -- Initialize private state
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_init(void)
|
||||
{
|
||||
Size prfs_size;
|
||||
@@ -2167,7 +2155,7 @@ neon_prefetch_response_usable(neon_request_lsns *request_lsns,
|
||||
/*
|
||||
* neon_exists() -- Does the physical file exist?
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
{
|
||||
bool exists;
|
||||
@@ -2273,7 +2261,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
*
|
||||
* If isRedo is true, it's okay for the relation to exist already.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
switch (reln->smgr_relpersistence)
|
||||
@@ -2349,7 +2337,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
|
||||
* Note: any failure should be reported as WARNING not ERROR, because
|
||||
* we are usually not in a transaction anymore when this is called.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
|
||||
{
|
||||
/*
|
||||
@@ -2373,7 +2361,7 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
|
||||
* EOF). Note that we assume writing a block beyond current EOF
|
||||
* causes intervening file space to become filled with zeroes.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer, bool skipFsync)
|
||||
@@ -2465,7 +2453,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 16
|
||||
static void
|
||||
void
|
||||
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
|
||||
int nblocks, bool skipFsync)
|
||||
{
|
||||
@@ -2561,7 +2549,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
|
||||
/*
|
||||
* neon_open() -- Initialize newly-opened relation.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_open(SMgrRelation reln)
|
||||
{
|
||||
/*
|
||||
@@ -2579,7 +2567,7 @@ neon_open(SMgrRelation reln)
|
||||
/*
|
||||
* neon_close() -- Close the specified relation, if it isn't closed already.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_close(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
/*
|
||||
@@ -2594,12 +2582,13 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
|
||||
/*
|
||||
* neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
int nblocks)
|
||||
{
|
||||
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
||||
BufferTag tag;
|
||||
bool io_initiated = false;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -2623,6 +2612,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
while (nblocks > 0)
|
||||
{
|
||||
int iterblocks = Min(nblocks, PG_IOV_MAX);
|
||||
int seqlen = 0;
|
||||
bits8 lfc_present[PG_IOV_MAX / 8];
|
||||
memset(lfc_present, 0, sizeof(lfc_present));
|
||||
|
||||
@@ -2634,6 +2624,8 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
continue;
|
||||
}
|
||||
|
||||
io_initiated = true;
|
||||
|
||||
tag.blockNum = blocknum;
|
||||
|
||||
for (int i = 0; i < PG_IOV_MAX / 8; i++)
|
||||
@@ -2656,7 +2648,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
/*
|
||||
* neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
{
|
||||
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
|
||||
@@ -2700,7 +2692,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
||||
* This accepts a range of blocks because flushing several pages at once is
|
||||
* considerably more efficient than doing so individually.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks)
|
||||
{
|
||||
@@ -2750,19 +2742,14 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
|
||||
uint64 ring_index;
|
||||
PrfHashEntry *entry;
|
||||
PrefetchRequest *slot;
|
||||
PrefetchRequest hashkey;
|
||||
BufferTag buftag = {0};
|
||||
|
||||
Assert(PointerIsValid(request_lsns));
|
||||
Assert(nblocks >= 1);
|
||||
|
||||
/*
|
||||
* Use an intermediate PrefetchRequest struct as the hash key to ensure
|
||||
* correct alignment and that the padding bytes are cleared.
|
||||
*/
|
||||
memset(&hashkey.buftag, 0, sizeof(BufferTag));
|
||||
CopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);
|
||||
hashkey.buftag.forkNum = forkNum;
|
||||
hashkey.buftag.blockNum = base_blockno;
|
||||
CopyNRelFileInfoToBufTag(buftag, rinfo);
|
||||
buftag.forkNum = forkNum;
|
||||
buftag.blockNum = base_blockno;
|
||||
|
||||
/*
|
||||
* The redo process does not lock pages that it needs to replay but are
|
||||
@@ -2780,7 +2767,7 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
|
||||
* weren't for the behaviour of the LwLsn cache that uses the highest
|
||||
* value of the LwLsn cache when the entry is not found.
|
||||
*/
|
||||
prefetch_register_bufferv(hashkey.buftag, request_lsns, nblocks, mask, false);
|
||||
prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);
|
||||
|
||||
for (int i = 0; i < nblocks; i++)
|
||||
{
|
||||
@@ -2801,8 +2788,8 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
|
||||
* Try to find prefetched page in the list of received pages.
|
||||
*/
|
||||
Retry:
|
||||
hashkey.buftag.blockNum = blockno;
|
||||
entry = prfh_lookup(MyPState->prf_hash, &hashkey);
|
||||
buftag.blockNum = blockno;
|
||||
entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &buftag);
|
||||
|
||||
if (entry != NULL)
|
||||
{
|
||||
@@ -2810,6 +2797,7 @@ Retry:
|
||||
if (neon_prefetch_response_usable(reqlsns, slot))
|
||||
{
|
||||
ring_index = slot->my_ring_index;
|
||||
pgBufferUsage.prefetch.hits += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2839,7 +2827,10 @@ Retry:
|
||||
{
|
||||
if (entry == NULL)
|
||||
{
|
||||
ring_index = prefetch_register_bufferv(hashkey.buftag, reqlsns, 1, NULL, false);
|
||||
pgBufferUsage.prefetch.misses += 1;
|
||||
MyNeonCounters->getpage_prefetch_misses_total++;
|
||||
|
||||
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
|
||||
Assert(ring_index != UINT64_MAX);
|
||||
slot = GetPrfSlot(ring_index);
|
||||
}
|
||||
@@ -2864,8 +2855,8 @@ Retry:
|
||||
} while (!prefetch_wait_for(ring_index));
|
||||
|
||||
Assert(slot->status == PRFS_RECEIVED);
|
||||
Assert(memcmp(&hashkey.buftag, &slot->buftag, sizeof(BufferTag)) == 0);
|
||||
Assert(hashkey.buftag.blockNum == base_blockno + i);
|
||||
Assert(memcmp(&buftag, &slot->buftag, sizeof(BufferTag)) == 0);
|
||||
Assert(buftag.blockNum == base_blockno + i);
|
||||
|
||||
resp = slot->response;
|
||||
|
||||
@@ -2921,10 +2912,10 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
* neon_read() -- Read the specified block from a relation.
|
||||
*/
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
static void
|
||||
void
|
||||
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer)
|
||||
#else
|
||||
static void
|
||||
void
|
||||
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
|
||||
#endif
|
||||
{
|
||||
@@ -3033,7 +3024,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
|
||||
#endif /* PG_MAJORVERSION_NUM <= 16 */
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
static void
|
||||
void
|
||||
neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void **buffers, BlockNumber nblocks)
|
||||
{
|
||||
@@ -3068,9 +3059,6 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
|
||||
nblocks, read);
|
||||
|
||||
if (lfc_result > 0)
|
||||
MyNeonCounters->file_cache_hits_total += lfc_result;
|
||||
|
||||
/* Read all blocks from LFC, so we're done */
|
||||
if (lfc_result == nblocks)
|
||||
return;
|
||||
@@ -3197,7 +3185,6 @@ hexdump_page(char *page)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 17
|
||||
/*
|
||||
* neon_write() -- Write the supplied block at the appropriate location.
|
||||
*
|
||||
@@ -3205,7 +3192,7 @@ hexdump_page(char *page)
|
||||
* relation (ie, those before the current EOF). To extend a relation,
|
||||
* use mdextend().
|
||||
*/
|
||||
static void
|
||||
void
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
|
||||
#else
|
||||
@@ -3271,12 +3258,11 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
static void
|
||||
void
|
||||
neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
const void **buffers, BlockNumber nblocks, bool skipFsync)
|
||||
{
|
||||
@@ -3326,7 +3312,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
|
||||
/*
|
||||
* neon_nblocks() -- Get the number of blocks stored in a relation.
|
||||
*/
|
||||
static BlockNumber
|
||||
BlockNumber
|
||||
neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
NeonResponse *resp;
|
||||
@@ -3463,7 +3449,7 @@ neon_dbsize(Oid dbNode)
|
||||
/*
|
||||
* neon_truncate() -- Truncate relation to specified number of blocks.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
@@ -3532,7 +3518,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
|
||||
* crash before the next checkpoint syncs the newly-inactive segment, that
|
||||
* segment may survive recovery, reintroducing unwanted data into the table.
|
||||
*/
|
||||
static void
|
||||
void
|
||||
neon_immedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
switch (reln->smgr_relpersistence)
|
||||
@@ -3562,8 +3548,8 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
|
||||
}
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
static void
|
||||
neon_registersync(SMgrRelation reln, ForkNumber forknum)
|
||||
void
|
||||
neon_regisersync(SMgrRelation reln, ForkNumber forknum)
|
||||
{
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -3747,8 +3733,6 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
SlruKind kind;
|
||||
int n_blocks;
|
||||
shardno_t shard_no = 0; /* All SLRUs are at shard 0 */
|
||||
NeonResponse *resp;
|
||||
NeonGetSlruSegmentRequest request;
|
||||
|
||||
/*
|
||||
* Compute a request LSN to use, similar to neon_get_request_lsns() but the
|
||||
@@ -3787,7 +3771,8 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
else
|
||||
return -1;
|
||||
|
||||
request = (NeonGetSlruSegmentRequest) {
|
||||
NeonResponse *resp;
|
||||
NeonGetSlruSegmentRequest request = {
|
||||
.req.tag = T_NeonGetSlruSegmentRequest,
|
||||
.req.lsn = request_lsn,
|
||||
.req.not_modified_since = not_modified_since,
|
||||
@@ -3894,7 +3879,7 @@ static const struct f_smgr neon_smgr =
|
||||
.smgr_truncate = neon_truncate,
|
||||
.smgr_immedsync = neon_immedsync,
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
.smgr_registersync = neon_registersync,
|
||||
.smgr_registersync = neon_regisersync,
|
||||
#endif
|
||||
.smgr_start_unlogged_build = neon_start_unlogged_build,
|
||||
.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
|
||||
|
||||
@@ -252,6 +252,8 @@ WalProposerPoll(WalProposer *wp)
|
||||
/* timeout expired: poll state */
|
||||
if (rc == 0 || TimeToReconnect(wp, now) <= 0)
|
||||
{
|
||||
TimestampTz now;
|
||||
|
||||
/*
|
||||
* If no WAL was generated during timeout (and we have already
|
||||
* collected the quorum), then send empty keepalive message
|
||||
@@ -267,7 +269,8 @@ WalProposerPoll(WalProposer *wp)
|
||||
now = wp->api.get_current_timestamp(wp);
|
||||
for (int i = 0; i < wp->n_safekeepers; i++)
|
||||
{
|
||||
sk = &wp->safekeeper[i];
|
||||
Safekeeper *sk = &wp->safekeeper[i];
|
||||
|
||||
if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
|
||||
wp->config->safekeeper_connection_timeout))
|
||||
{
|
||||
@@ -1077,7 +1080,7 @@ SendProposerElected(Safekeeper *sk)
|
||||
ProposerElected msg;
|
||||
TermHistory *th;
|
||||
term_t lastCommonTerm;
|
||||
int idx;
|
||||
int i;
|
||||
|
||||
/* Now that we are ready to send it's a good moment to create WAL reader */
|
||||
wp->api.wal_reader_allocate(sk);
|
||||
@@ -1096,15 +1099,15 @@ SendProposerElected(Safekeeper *sk)
|
||||
/* We must start somewhere. */
|
||||
Assert(wp->propTermHistory.n_entries >= 1);
|
||||
|
||||
for (idx = 0; idx < Min(wp->propTermHistory.n_entries, th->n_entries); idx++)
|
||||
for (i = 0; i < Min(wp->propTermHistory.n_entries, th->n_entries); i++)
|
||||
{
|
||||
if (wp->propTermHistory.entries[idx].term != th->entries[idx].term)
|
||||
if (wp->propTermHistory.entries[i].term != th->entries[i].term)
|
||||
break;
|
||||
/* term must begin everywhere at the same point */
|
||||
Assert(wp->propTermHistory.entries[idx].lsn == th->entries[idx].lsn);
|
||||
Assert(wp->propTermHistory.entries[i].lsn == th->entries[i].lsn);
|
||||
}
|
||||
idx--; /* step back to the last common term */
|
||||
if (idx < 0)
|
||||
i--; /* step back to the last common term */
|
||||
if (i < 0)
|
||||
{
|
||||
/* safekeeper is empty or no common point, start from the beginning */
|
||||
sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
|
||||
@@ -1125,14 +1128,14 @@ SendProposerElected(Safekeeper *sk)
|
||||
* proposer, LSN it is currently writing, but then we just pick
|
||||
* safekeeper pos as it obviously can't be higher.
|
||||
*/
|
||||
if (wp->propTermHistory.entries[idx].term == wp->propTerm)
|
||||
if (wp->propTermHistory.entries[i].term == wp->propTerm)
|
||||
{
|
||||
sk->startStreamingAt = sk->voteResponse.flushLsn;
|
||||
}
|
||||
else
|
||||
{
|
||||
XLogRecPtr propEndLsn = wp->propTermHistory.entries[idx + 1].lsn;
|
||||
XLogRecPtr skEndLsn = (idx + 1 < th->n_entries ? th->entries[idx + 1].lsn : sk->voteResponse.flushLsn);
|
||||
XLogRecPtr propEndLsn = wp->propTermHistory.entries[i + 1].lsn;
|
||||
XLogRecPtr skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : sk->voteResponse.flushLsn);
|
||||
|
||||
sk->startStreamingAt = Min(propEndLsn, skEndLsn);
|
||||
}
|
||||
@@ -1146,7 +1149,7 @@ SendProposerElected(Safekeeper *sk)
|
||||
msg.termHistory = &wp->propTermHistory;
|
||||
msg.timelineStartLsn = wp->timelineStartLsn;
|
||||
|
||||
lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0;
|
||||
lastCommonTerm = i >= 0 ? wp->propTermHistory.entries[i].term : 0;
|
||||
wp_log(LOG,
|
||||
"sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
|
||||
sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
|
||||
@@ -1638,7 +1641,7 @@ UpdateDonorShmem(WalProposer *wp)
|
||||
* Process AppendResponse message from safekeeper.
|
||||
*/
|
||||
static void
|
||||
HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
|
||||
HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
|
||||
{
|
||||
XLogRecPtr candidateTruncateLsn;
|
||||
XLogRecPtr newCommitLsn;
|
||||
@@ -1657,7 +1660,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
|
||||
* and WAL is committed by the quorum. BroadcastAppendRequest() should be
|
||||
* called to notify safekeepers about the new commitLsn.
|
||||
*/
|
||||
wp->api.process_safekeeper_feedback(wp, fromsk);
|
||||
wp->api.process_safekeeper_feedback(wp, sk);
|
||||
|
||||
/*
|
||||
* Try to advance truncateLsn -- the last record flushed to all
|
||||
|
||||
@@ -725,7 +725,7 @@ extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPt
|
||||
extern void WalProposerPoll(WalProposer *wp);
|
||||
extern void WalProposerFree(WalProposer *wp);
|
||||
|
||||
extern WalproposerShmemState *GetWalpropShmemState(void);
|
||||
extern WalproposerShmemState *GetWalpropShmemState();
|
||||
|
||||
/*
|
||||
* WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to
|
||||
@@ -745,7 +745,7 @@ extern TimeLineID walprop_pg_get_timeline_id(void);
|
||||
* catch logging.
|
||||
*/
|
||||
#ifdef WALPROPOSER_LIB
|
||||
extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...) pg_attribute_printf(3, 4);
|
||||
extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...);
|
||||
#define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__)
|
||||
#else
|
||||
#define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)
|
||||
|
||||
@@ -286,9 +286,6 @@ safekeepers_cmp(char *old, char *new)
|
||||
static void
|
||||
assign_neon_safekeepers(const char *newval, void *extra)
|
||||
{
|
||||
char *newval_copy;
|
||||
char *oldval;
|
||||
|
||||
if (!am_walproposer)
|
||||
return;
|
||||
|
||||
@@ -298,8 +295,8 @@ assign_neon_safekeepers(const char *newval, void *extra)
|
||||
}
|
||||
|
||||
/* Copy values because we will modify them in split_safekeepers_list() */
|
||||
newval_copy = pstrdup(newval);
|
||||
oldval = pstrdup(wal_acceptors_list);
|
||||
char *newval_copy = pstrdup(newval);
|
||||
char *oldval = pstrdup(wal_acceptors_list);
|
||||
|
||||
/*
|
||||
* TODO: restarting through FATAL is stupid and introduces 1s delay before
|
||||
@@ -541,7 +538,7 @@ nwp_shmem_startup_hook(void)
|
||||
}
|
||||
|
||||
WalproposerShmemState *
|
||||
GetWalpropShmemState(void)
|
||||
GetWalpropShmemState()
|
||||
{
|
||||
Assert(walprop_shared != NULL);
|
||||
return walprop_shared;
|
||||
|
||||
@@ -191,14 +191,13 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)
|
||||
|
||||
if (!wal_reader)
|
||||
{
|
||||
XLogRecPtr basebackupLsn = GetRedoStartLsn();
|
||||
XLogRecPtr epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);
|
||||
|
||||
/* should never happen */
|
||||
if (basebackupLsn == 0)
|
||||
if (epochStartLsn == 0)
|
||||
{
|
||||
elog(ERROR, "unable to start walsender when basebackupLsn is 0");
|
||||
elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
|
||||
}
|
||||
wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] ");
|
||||
wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
|
||||
}
|
||||
xlr->page_read = NeonWALPageRead;
|
||||
xlr->segment_open = NeonWALReadSegmentOpen;
|
||||
|
||||
@@ -44,6 +44,27 @@ infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
|
||||
appendStringInfoString(buf, "]");
|
||||
}
|
||||
|
||||
static void
|
||||
truncate_flags_desc(StringInfo buf, uint8 flags)
|
||||
{
|
||||
appendStringInfoString(buf, "flags: [");
|
||||
|
||||
if (flags & XLH_TRUNCATE_CASCADE)
|
||||
appendStringInfoString(buf, "CASCADE, ");
|
||||
if (flags & XLH_TRUNCATE_RESTART_SEQS)
|
||||
appendStringInfoString(buf, "RESTART_SEQS, ");
|
||||
|
||||
if (buf->data[buf->len - 1] == ' ')
|
||||
{
|
||||
/* Truncate-away final unneeded ", " */
|
||||
Assert(buf->data[buf->len - 2] == ',');
|
||||
buf->len -= 2;
|
||||
buf->data[buf->len] = '\0';
|
||||
}
|
||||
|
||||
appendStringInfoString(buf, "]");
|
||||
}
|
||||
|
||||
void
|
||||
neon_rm_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
|
||||
@@ -136,7 +136,7 @@ static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
|
||||
static void GetPage(StringInfo input_message);
|
||||
static void Ping(StringInfo input_message);
|
||||
static ssize_t buffered_read(void *buf, size_t count);
|
||||
static void CreateFakeSharedMemoryAndSemaphores(void);
|
||||
static void CreateFakeSharedMemoryAndSemaphores();
|
||||
|
||||
static BufferTag target_redo_tag;
|
||||
|
||||
@@ -170,40 +170,6 @@ close_range_syscall(unsigned int start_fd, unsigned int count, unsigned int flag
|
||||
return syscall(__NR_close_range, start_fd, count, flags);
|
||||
}
|
||||
|
||||
|
||||
static PgSeccompRule allowed_syscalls[] =
|
||||
{
|
||||
/* Hard requirements */
|
||||
PG_SCMP_ALLOW(exit_group),
|
||||
PG_SCMP_ALLOW(pselect6),
|
||||
PG_SCMP_ALLOW(read),
|
||||
PG_SCMP_ALLOW(select),
|
||||
PG_SCMP_ALLOW(write),
|
||||
|
||||
/* Memory allocation */
|
||||
PG_SCMP_ALLOW(brk),
|
||||
#ifndef MALLOC_NO_MMAP
|
||||
/* TODO: musl doesn't have mallopt */
|
||||
PG_SCMP_ALLOW(mmap),
|
||||
PG_SCMP_ALLOW(munmap),
|
||||
#endif
|
||||
/*
|
||||
* getpid() is called on assertion failure, in ExceptionalCondition.
|
||||
* It's not really needed, but seems pointless to hide it either. The
|
||||
* system call unlikely to expose a kernel vulnerability, and the PID
|
||||
* is stored in MyProcPid anyway.
|
||||
*/
|
||||
PG_SCMP_ALLOW(getpid),
|
||||
|
||||
/* Enable those for a proper shutdown. */
|
||||
#if 0
|
||||
PG_SCMP_ALLOW(munmap),
|
||||
PG_SCMP_ALLOW(shmctl),
|
||||
PG_SCMP_ALLOW(shmdt),
|
||||
PG_SCMP_ALLOW(unlink), /* shm_unlink */
|
||||
#endif
|
||||
};
|
||||
|
||||
static void
|
||||
enter_seccomp_mode(void)
|
||||
{
|
||||
@@ -217,12 +183,44 @@ enter_seccomp_mode(void)
|
||||
(errcode(ERRCODE_SYSTEM_ERROR),
|
||||
errmsg("seccomp: could not close files >= fd 3")));
|
||||
|
||||
PgSeccompRule syscalls[] =
|
||||
{
|
||||
/* Hard requirements */
|
||||
PG_SCMP_ALLOW(exit_group),
|
||||
PG_SCMP_ALLOW(pselect6),
|
||||
PG_SCMP_ALLOW(read),
|
||||
PG_SCMP_ALLOW(select),
|
||||
PG_SCMP_ALLOW(write),
|
||||
|
||||
/* Memory allocation */
|
||||
PG_SCMP_ALLOW(brk),
|
||||
#ifndef MALLOC_NO_MMAP
|
||||
/* TODO: musl doesn't have mallopt */
|
||||
PG_SCMP_ALLOW(mmap),
|
||||
PG_SCMP_ALLOW(munmap),
|
||||
#endif
|
||||
/*
|
||||
* getpid() is called on assertion failure, in ExceptionalCondition.
|
||||
* It's not really needed, but seems pointless to hide it either. The
|
||||
* system call unlikely to expose a kernel vulnerability, and the PID
|
||||
* is stored in MyProcPid anyway.
|
||||
*/
|
||||
PG_SCMP_ALLOW(getpid),
|
||||
|
||||
/* Enable those for a proper shutdown.
|
||||
PG_SCMP_ALLOW(munmap),
|
||||
PG_SCMP_ALLOW(shmctl),
|
||||
PG_SCMP_ALLOW(shmdt),
|
||||
PG_SCMP_ALLOW(unlink), // shm_unlink
|
||||
*/
|
||||
};
|
||||
|
||||
#ifdef MALLOC_NO_MMAP
|
||||
/* Ask glibc not to use mmap() */
|
||||
mallopt(M_MMAP_MAX, 0);
|
||||
#endif
|
||||
|
||||
seccomp_load_rules(allowed_syscalls, lengthof(allowed_syscalls));
|
||||
seccomp_load_rules(syscalls, lengthof(syscalls));
|
||||
}
|
||||
#endif /* HAVE_LIBSECCOMP */
|
||||
|
||||
@@ -451,7 +449,7 @@ WalRedoMain(int argc, char *argv[])
|
||||
* half-initialized postgres.
|
||||
*/
|
||||
static void
|
||||
CreateFakeSharedMemoryAndSemaphores(void)
|
||||
CreateFakeSharedMemoryAndSemaphores()
|
||||
{
|
||||
PGShmemHeader *shim = NULL;
|
||||
PGShmemHeader *hdr;
|
||||
@@ -994,7 +992,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
|
||||
* If this block isn't one we are currently restoring, then return 'true'
|
||||
* so that this gets ignored
|
||||
*/
|
||||
return !BufferTagsEqual(&target_tag, &target_redo_tag);
|
||||
return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
29
poetry.lock
generated
29
poetry.lock
generated
@@ -2095,7 +2095,6 @@ files = [
|
||||
{file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
|
||||
{file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
|
||||
@@ -2104,8 +2103,6 @@ files = [
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
|
||||
{file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
|
||||
{file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
|
||||
{file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
|
||||
{file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
|
||||
@@ -2587,7 +2584,6 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
@@ -2733,22 +2729,21 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "responses"
|
||||
version = "0.25.3"
|
||||
version = "0.21.0"
|
||||
description = "A utility library for mocking out the `requests` Python library."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "responses-0.25.3-py3-none-any.whl", hash = "sha256:521efcbc82081ab8daa588e08f7e8a64ce79b91c39f6e62199b19159bea7dbcb"},
|
||||
{file = "responses-0.25.3.tar.gz", hash = "sha256:617b9247abd9ae28313d57a75880422d55ec63c29d33d629697590a034358dba"},
|
||||
{file = "responses-0.21.0-py3-none-any.whl", hash = "sha256:2dcc863ba63963c0c3d9ee3fa9507cbe36b7d7b0fccb4f0bdfd9e96c539b1487"},
|
||||
{file = "responses-0.21.0.tar.gz", hash = "sha256:b82502eb5f09a0289d8e209e7bad71ef3978334f56d09b444253d5ad67bf5253"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pyyaml = "*"
|
||||
requests = ">=2.30.0,<3.0"
|
||||
urllib3 = ">=1.25.10,<3.0"
|
||||
requests = ">=2.0,<3.0"
|
||||
urllib3 = ">=1.25.10"
|
||||
|
||||
[package.extras]
|
||||
tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]
|
||||
tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-localserver", "types-mock", "types-requests"]
|
||||
|
||||
[[package]]
|
||||
name = "rfc3339-validator"
|
||||
@@ -3142,16 +3137,6 @@ files = [
|
||||
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
|
||||
{file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
|
||||
{file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
|
||||
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import enum
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
|
||||
@enum.unique
|
||||
@@ -56,12 +55,12 @@ def mypy() -> str:
|
||||
return "poetry run mypy"
|
||||
|
||||
|
||||
def get_commit_files() -> list[str]:
|
||||
def get_commit_files() -> List[str]:
|
||||
files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split())
|
||||
return files.decode().splitlines()
|
||||
|
||||
|
||||
def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False):
|
||||
def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: bool = False):
|
||||
print(f"Checking: {name} ", end="")
|
||||
applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))
|
||||
if not applicable_files:
|
||||
|
||||
@@ -38,8 +38,8 @@ hostname.workspace = true
|
||||
http.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper0.workspace = true
|
||||
hyper = { workspace = true, features = ["server", "http1", "http2"] }
|
||||
hyper.workspace = true
|
||||
hyper1 = { package = "hyper", version = "1.2", features = ["server"] }
|
||||
hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
|
||||
http-body-util = { version = "0.1" }
|
||||
indexmap.workspace = true
|
||||
@@ -77,7 +77,7 @@ subtle.workspace = true
|
||||
thiserror.workspace = true
|
||||
tikv-jemallocator.workspace = true
|
||||
tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
|
||||
tokio-postgres = { workspace = true, features = ["with-serde_json-1"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-postgres-rustls.workspace = true
|
||||
tokio-rustls.workspace = true
|
||||
tokio-util.workspace = true
|
||||
@@ -101,7 +101,7 @@ jose-jwa = "0.1.2"
|
||||
jose-jwk = { version = "0.1.2", features = ["p256", "p384", "rsa"] }
|
||||
signature = "2"
|
||||
ecdsa = "0.16"
|
||||
p256 = { version = "0.13", features = ["jwk"] }
|
||||
p256 = "0.13"
|
||||
rsa = "0.9"
|
||||
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -18,7 +18,7 @@ pub(crate) use flow::*;
|
||||
use tokio::time::error::Elapsed;
|
||||
|
||||
use crate::{
|
||||
control_plane,
|
||||
console,
|
||||
error::{ReportableError, UserFacingError},
|
||||
};
|
||||
use std::{io, net::IpAddr};
|
||||
@@ -34,7 +34,7 @@ pub(crate) enum AuthErrorImpl {
|
||||
Web(#[from] backend::WebAuthError),
|
||||
|
||||
#[error(transparent)]
|
||||
GetAuthInfo(#[from] control_plane::errors::GetAuthInfoError),
|
||||
GetAuthInfo(#[from] console::errors::GetAuthInfoError),
|
||||
|
||||
/// SASL protocol errors (includes [SCRAM](crate::scram)).
|
||||
#[error(transparent)]
|
||||
@@ -1,27 +1,27 @@
|
||||
mod classic;
|
||||
mod console_redirect;
|
||||
mod hacks;
|
||||
pub mod jwt;
|
||||
pub mod local;
|
||||
mod web;
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
pub(crate) use console_redirect::WebAuthError;
|
||||
use ipnet::{Ipv4Net, Ipv6Net};
|
||||
use local::LocalBackend;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_postgres::config::AuthKeys;
|
||||
use tracing::{info, warn};
|
||||
pub(crate) use web::WebAuthError;
|
||||
|
||||
use crate::auth::credentials::check_peer_addr_is_in_list;
|
||||
use crate::auth::{validate_password_and_exchange, AuthError};
|
||||
use crate::cache::Cached;
|
||||
use crate::console::errors::GetAuthInfoError;
|
||||
use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
|
||||
use crate::console::{AuthSecret, NodeInfo};
|
||||
use crate::context::RequestMonitoring;
|
||||
use crate::control_plane::errors::GetAuthInfoError;
|
||||
use crate::control_plane::provider::{CachedRoleSecret, ControlPlaneBackend};
|
||||
use crate::control_plane::{AuthSecret, NodeInfo};
|
||||
use crate::intern::EndpointIdInt;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
@@ -31,7 +31,7 @@ use crate::stream::Stream;
|
||||
use crate::{
|
||||
auth::{self, ComputeUserInfoMaybeEndpoint},
|
||||
config::AuthenticationConfig,
|
||||
control_plane::{
|
||||
console::{
|
||||
self,
|
||||
provider::{CachedAllowedIps, CachedNodeInfo},
|
||||
Api,
|
||||
@@ -67,19 +67,19 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
|
||||
/// backends which require them for the authentication process.
|
||||
pub enum Backend<'a, T, D> {
|
||||
/// Cloud API (V2).
|
||||
ControlPlane(MaybeOwned<'a, ControlPlaneBackend>, T),
|
||||
Console(MaybeOwned<'a, ConsoleBackend>, T),
|
||||
/// Authentication via a web browser.
|
||||
ConsoleRedirect(MaybeOwned<'a, url::ApiUrl>, D),
|
||||
Web(MaybeOwned<'a, url::ApiUrl>, D),
|
||||
/// Local proxy uses configured auth credentials and does not wake compute
|
||||
Local(MaybeOwned<'a, LocalBackend>),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) trait TestBackend: Send + Sync + 'static {
|
||||
fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>;
|
||||
fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
|
||||
fn get_allowed_ips_and_secret(
|
||||
&self,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>;
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
|
||||
fn dyn_clone(&self) -> Box<dyn TestBackend>;
|
||||
}
|
||||
|
||||
@@ -93,23 +93,18 @@ impl Clone for Box<dyn TestBackend> {
|
||||
impl std::fmt::Display for Backend<'_, (), ()> {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::ControlPlane(api, ()) => match &**api {
|
||||
ControlPlaneBackend::Management(endpoint) => fmt
|
||||
.debug_tuple("ControlPlane::Management")
|
||||
.field(&endpoint.url())
|
||||
.finish(),
|
||||
Self::Console(api, ()) => match &**api {
|
||||
ConsoleBackend::Console(endpoint) => {
|
||||
fmt.debug_tuple("Console").field(&endpoint.url()).finish()
|
||||
}
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
ControlPlaneBackend::PostgresMock(endpoint) => fmt
|
||||
.debug_tuple("ControlPlane::PostgresMock")
|
||||
.field(&endpoint.url())
|
||||
.finish(),
|
||||
ConsoleBackend::Postgres(endpoint) => {
|
||||
fmt.debug_tuple("Postgres").field(&endpoint.url()).finish()
|
||||
}
|
||||
#[cfg(test)]
|
||||
ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
|
||||
ConsoleBackend::Test(_) => fmt.debug_tuple("Test").finish(),
|
||||
},
|
||||
Self::ConsoleRedirect(url, ()) => fmt
|
||||
.debug_tuple("ConsoleRedirect")
|
||||
.field(&url.as_str())
|
||||
.finish(),
|
||||
Self::Web(url, ()) => fmt.debug_tuple("Web").field(&url.as_str()).finish(),
|
||||
Self::Local(_) => fmt.debug_tuple("Local").finish(),
|
||||
}
|
||||
}
|
||||
@@ -120,8 +115,8 @@ impl<T, D> Backend<'_, T, D> {
|
||||
/// This helps us pass structured config to async tasks.
|
||||
pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> {
|
||||
match self {
|
||||
Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x),
|
||||
Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(MaybeOwned::Borrowed(c), x),
|
||||
Self::Console(c, x) => Backend::Console(MaybeOwned::Borrowed(c), x),
|
||||
Self::Web(c, x) => Backend::Web(MaybeOwned::Borrowed(c), x),
|
||||
Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
|
||||
}
|
||||
}
|
||||
@@ -133,8 +128,8 @@ impl<'a, T, D> Backend<'a, T, D> {
|
||||
/// a function to a contained value.
|
||||
pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> {
|
||||
match self {
|
||||
Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)),
|
||||
Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(c, x),
|
||||
Self::Console(c, x) => Backend::Console(c, f(x)),
|
||||
Self::Web(c, x) => Backend::Web(c, x),
|
||||
Self::Local(l) => Backend::Local(l),
|
||||
}
|
||||
}
|
||||
@@ -144,8 +139,8 @@ impl<'a, T, D, E> Backend<'a, Result<T, E>, D> {
|
||||
/// This is most useful for error handling.
|
||||
pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> {
|
||||
match self {
|
||||
Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
|
||||
Self::ConsoleRedirect(c, x) => Ok(Backend::ConsoleRedirect(c, x)),
|
||||
Self::Console(c, x) => x.map(|x| Backend::Console(c, x)),
|
||||
Self::Web(c, x) => Ok(Backend::Web(c, x)),
|
||||
Self::Local(l) => Ok(Backend::Local(l)),
|
||||
}
|
||||
}
|
||||
@@ -175,12 +170,10 @@ impl ComputeUserInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(test, derive(Debug))]
|
||||
pub(crate) enum ComputeCredentialKeys {
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Password(Vec<u8>),
|
||||
AuthKeys(AuthKeys),
|
||||
JwtPayload(Vec<u8>),
|
||||
None,
|
||||
}
|
||||
|
||||
@@ -297,7 +290,7 @@ impl AuthenticationConfig {
|
||||
/// All authentication flows will emit an AuthenticationOk message if successful.
|
||||
async fn auth_quirks(
|
||||
ctx: &RequestMonitoring,
|
||||
api: &impl control_plane::Api,
|
||||
api: &impl console::Api,
|
||||
user_info: ComputeUserInfoMaybeEndpoint,
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
allow_cleartext: bool,
|
||||
@@ -419,8 +412,8 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
|
||||
/// Get username from the credentials.
|
||||
pub(crate) fn get_user(&self) -> &str {
|
||||
match self {
|
||||
Self::ControlPlane(_, user_info) => &user_info.user,
|
||||
Self::ConsoleRedirect(_, ()) => "web",
|
||||
Self::Console(_, user_info) => &user_info.user,
|
||||
Self::Web(_, ()) => "web",
|
||||
Self::Local(_) => "local",
|
||||
}
|
||||
}
|
||||
@@ -436,7 +429,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> {
|
||||
let res = match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
Self::Console(api, user_info) => {
|
||||
info!(
|
||||
user = &*user_info.user,
|
||||
project = user_info.endpoint(),
|
||||
@@ -453,15 +446,15 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await?;
|
||||
Backend::ControlPlane(api, credentials)
|
||||
Backend::Console(api, credentials)
|
||||
}
|
||||
// NOTE: this auth backend doesn't use client credentials.
|
||||
Self::ConsoleRedirect(url, ()) => {
|
||||
Self::Web(url, ()) => {
|
||||
info!("performing web authentication");
|
||||
|
||||
let info = console_redirect::authenticate(ctx, config, &url, client).await?;
|
||||
let info = web::authenticate(ctx, config, &url, client).await?;
|
||||
|
||||
Backend::ConsoleRedirect(url, info)
|
||||
Backend::Web(url, info)
|
||||
}
|
||||
Self::Local(_) => {
|
||||
return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
|
||||
@@ -479,8 +472,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
|
||||
ctx: &RequestMonitoring,
|
||||
) -> Result<CachedRoleSecret, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
|
||||
Self::ConsoleRedirect(_, ()) => Ok(Cached::new_uncached(None)),
|
||||
Self::Console(api, user_info) => api.get_role_secret(ctx, user_info).await,
|
||||
Self::Web(_, ()) => Ok(Cached::new_uncached(None)),
|
||||
Self::Local(_) => Ok(Cached::new_uncached(None)),
|
||||
}
|
||||
}
|
||||
@@ -490,10 +483,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
|
||||
ctx: &RequestMonitoring,
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_allowed_ips_and_secret(ctx, user_info).await
|
||||
}
|
||||
Self::ConsoleRedirect(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
|
||||
Self::Console(api, user_info) => api.get_allowed_ips_and_secret(ctx, user_info).await,
|
||||
Self::Web(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
|
||||
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
|
||||
}
|
||||
}
|
||||
@@ -504,18 +495,18 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, NodeInfo> {
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
|
||||
) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
|
||||
Self::ConsoleRedirect(_, info) => Ok(Cached::new_uncached(info.clone())),
|
||||
Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await,
|
||||
Self::Web(_, info) => Ok(Cached::new_uncached(info.clone())),
|
||||
Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_keys(&self) -> &ComputeCredentialKeys {
|
||||
match self {
|
||||
Self::ControlPlane(_, creds) => &creds.keys,
|
||||
Self::ConsoleRedirect(_, _) => &ComputeCredentialKeys::None,
|
||||
Self::Console(_, creds) => &creds.keys,
|
||||
Self::Web(_, _) => &ComputeCredentialKeys::None,
|
||||
Self::Local(_) => &ComputeCredentialKeys::None,
|
||||
}
|
||||
}
|
||||
@@ -526,10 +517,10 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
ctx: &RequestMonitoring,
|
||||
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
|
||||
) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
|
||||
Self::ConsoleRedirect(_, ()) => {
|
||||
Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await,
|
||||
Self::Web(_, ()) => {
|
||||
unreachable!("web auth flow doesn't support waking the compute")
|
||||
}
|
||||
Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
|
||||
@@ -538,8 +529,8 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
|
||||
|
||||
fn get_keys(&self) -> &ComputeCredentialKeys {
|
||||
match self {
|
||||
Self::ControlPlane(_, creds) => &creds.keys,
|
||||
Self::ConsoleRedirect(_, ()) => &ComputeCredentialKeys::None,
|
||||
Self::Console(_, creds) => &creds.keys,
|
||||
Self::Web(_, ()) => &ComputeCredentialKeys::None,
|
||||
Self::Local(_) => &ComputeCredentialKeys::None,
|
||||
}
|
||||
}
|
||||
@@ -562,12 +553,12 @@ mod tests {
|
||||
use crate::{
|
||||
auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern},
|
||||
config::AuthenticationConfig,
|
||||
context::RequestMonitoring,
|
||||
control_plane::{
|
||||
console::{
|
||||
self,
|
||||
provider::{self, CachedAllowedIps, CachedRoleSecret},
|
||||
CachedNodeInfo,
|
||||
},
|
||||
context::RequestMonitoring,
|
||||
proxy::NeonOptions,
|
||||
rate_limiter::{EndpointRateLimiter, RateBucketInfo},
|
||||
scram::{threadpool::ThreadPool, ServerSecret},
|
||||
@@ -581,12 +572,12 @@ mod tests {
|
||||
secret: AuthSecret,
|
||||
}
|
||||
|
||||
impl control_plane::Api for Auth {
|
||||
impl console::Api for Auth {
|
||||
async fn get_role_secret(
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> {
|
||||
) -> Result<CachedRoleSecret, console::errors::GetAuthInfoError> {
|
||||
Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
|
||||
}
|
||||
|
||||
@@ -594,10 +585,8 @@ mod tests {
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<
|
||||
(CachedAllowedIps, Option<CachedRoleSecret>),
|
||||
control_plane::errors::GetAuthInfoError,
|
||||
> {
|
||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
|
||||
{
|
||||
Ok((
|
||||
CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
|
||||
Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
|
||||
@@ -616,7 +605,7 @@ mod tests {
|
||||
&self,
|
||||
_ctx: &RequestMonitoring,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
|
||||
) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
@@ -3,8 +3,8 @@ use crate::{
|
||||
auth::{self, backend::ComputeCredentialKeys, AuthFlow},
|
||||
compute,
|
||||
config::AuthenticationConfig,
|
||||
console::AuthSecret,
|
||||
context::RequestMonitoring,
|
||||
control_plane::AuthSecret,
|
||||
sasl,
|
||||
stream::{PqStream, Stream},
|
||||
};
|
||||
|
||||
@@ -2,8 +2,8 @@ use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};
|
||||
use crate::{
|
||||
auth::{self, AuthFlow},
|
||||
config::AuthenticationConfig,
|
||||
console::AuthSecret,
|
||||
context::RequestMonitoring,
|
||||
control_plane::AuthSecret,
|
||||
intern::EndpointIdInt,
|
||||
sasl,
|
||||
stream::{self, Stream},
|
||||
|
||||
@@ -17,8 +17,6 @@ use crate::{
|
||||
RoleName,
|
||||
};
|
||||
|
||||
use super::ComputeCredentialKeys;
|
||||
|
||||
// TODO(conrad): make these configurable.
|
||||
const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
|
||||
const MIN_RENEW: Duration = Duration::from_secs(30);
|
||||
@@ -243,7 +241,7 @@ impl JwkCacheEntryLock {
|
||||
endpoint: EndpointId,
|
||||
role_name: &RoleName,
|
||||
fetch: &F,
|
||||
) -> Result<ComputeCredentialKeys, anyhow::Error> {
|
||||
) -> Result<(), anyhow::Error> {
|
||||
// JWT compact form is defined to be
|
||||
// <B64(Header)> || . || <B64(Payload)> || . || <B64(Signature)>
|
||||
// where Signature = alg(<B64(Header)> || . || <B64(Payload)>);
|
||||
@@ -302,9 +300,9 @@ impl JwkCacheEntryLock {
|
||||
key => bail!("unsupported key type {key:?}"),
|
||||
};
|
||||
|
||||
let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
|
||||
let payload = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
|
||||
.context("Provided authentication token is not a valid JWT encoding")?;
|
||||
let payload = serde_json::from_slice::<JwtPayload<'_>>(&payloadb)
|
||||
let payload = serde_json::from_slice::<JwtPayload<'_>>(&payload)
|
||||
.context("Provided authentication token is not a valid JWT encoding")?;
|
||||
|
||||
tracing::debug!(?payload, "JWT signature valid with claims");
|
||||
@@ -329,7 +327,7 @@ impl JwkCacheEntryLock {
|
||||
);
|
||||
}
|
||||
|
||||
Ok(ComputeCredentialKeys::JwtPayload(payloadb))
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -341,7 +339,7 @@ impl JwkCache {
|
||||
role_name: &RoleName,
|
||||
fetch: &F,
|
||||
jwt: &str,
|
||||
) -> Result<ComputeCredentialKeys, anyhow::Error> {
|
||||
) -> Result<(), anyhow::Error> {
|
||||
// try with just a read lock first
|
||||
let key = (endpoint.clone(), role_name.clone());
|
||||
let entry = self.map.get(&key).as_deref().map(Arc::clone);
|
||||
@@ -573,7 +571,7 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use http::Response;
|
||||
use http_body_util::Full;
|
||||
use hyper::service::service_fn;
|
||||
use hyper1::service::service_fn;
|
||||
use hyper_util::rt::TokioIo;
|
||||
use rand::rngs::OsRng;
|
||||
use rsa::pkcs8::DecodePrivateKey;
|
||||
@@ -738,7 +736,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
|
||||
});
|
||||
|
||||
let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
|
||||
let server = hyper::server::conn::http1::Builder::new();
|
||||
let server = hyper1::server::conn::http1::Builder::new();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
|
||||
@@ -5,11 +5,11 @@ use arc_swap::ArcSwapOption;
|
||||
|
||||
use crate::{
|
||||
compute::ConnCfg,
|
||||
context::RequestMonitoring,
|
||||
control_plane::{
|
||||
console::{
|
||||
messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo},
|
||||
NodeInfo,
|
||||
},
|
||||
context::RequestMonitoring,
|
||||
intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
|
||||
EndpointId,
|
||||
};
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use crate::{
|
||||
auth, compute,
|
||||
config::AuthenticationConfig,
|
||||
console::{self, provider::NodeInfo},
|
||||
context::RequestMonitoring,
|
||||
control_plane::{self, provider::NodeInfo},
|
||||
error::{ReportableError, UserFacingError},
|
||||
stream::PqStream,
|
||||
waiters,
|
||||
@@ -70,7 +70,7 @@ pub(super) async fn authenticate(
|
||||
let (psql_session_id, waiter) = loop {
|
||||
let psql_session_id = new_psql_session_id();
|
||||
|
||||
match control_plane::mgmt::get_waiter(&psql_session_id) {
|
||||
match console::mgmt::get_waiter(&psql_session_id) {
|
||||
Ok(waiter) => break (psql_session_id, waiter),
|
||||
Err(_e) => continue,
|
||||
}
|
||||
@@ -3,8 +3,8 @@
|
||||
use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload};
|
||||
use crate::{
|
||||
config::TlsServerEndPoint,
|
||||
console::AuthSecret,
|
||||
context::RequestMonitoring,
|
||||
control_plane::AuthSecret,
|
||||
intern::EndpointIdInt,
|
||||
sasl,
|
||||
scram::{self, threadpool::ThreadPool},
|
||||
|
||||
@@ -12,7 +12,7 @@ use proxy::{
|
||||
},
|
||||
cancellation::CancellationHandlerMain,
|
||||
config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
|
||||
control_plane::{
|
||||
console::{
|
||||
locks::ApiLocks,
|
||||
messages::{EndpointJwksResponse, JwksSettings},
|
||||
},
|
||||
@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
|
||||
#[clap(long, default_value = "127.0.0.1:5432")]
|
||||
compute: SocketAddr,
|
||||
/// Path of the local proxy config file
|
||||
#[clap(long, default_value = "./local_proxy.json")]
|
||||
#[clap(long, default_value = "./localproxy.json")]
|
||||
config_path: Utf8PathBuf,
|
||||
/// Path of the local proxy PID file
|
||||
#[clap(long, default_value = "./local_proxy.pid")]
|
||||
#[clap(long, default_value = "./localproxy.pid")]
|
||||
pid_path: Utf8PathBuf,
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ struct SqlOverHttpArgs {
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let _logging_guard = proxy::logging::init_local_proxy()?;
|
||||
let _logging_guard = proxy::logging::init().await?;
|
||||
let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
|
||||
let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
|
||||
|
||||
@@ -138,7 +138,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
// in order to trigger the appropriate SIGHUP on config change.
|
||||
//
|
||||
// This also claims a "lock" that makes sure only one instance
|
||||
// of local_proxy runs at a time.
|
||||
// of local-proxy runs at a time.
|
||||
let _process_guard = loop {
|
||||
match pid_file::claim_for_current_process(&args.pid_path) {
|
||||
Ok(guard) => break guard,
|
||||
@@ -164,6 +164,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
16,
|
||||
));
|
||||
|
||||
// write the process ID to a file so that compute-ctl can find our process later
|
||||
// in order to trigger the appropriate SIGHUP on config change.
|
||||
let pid = std::process::id();
|
||||
info!("process running in PID {pid}");
|
||||
std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
|
||||
|
||||
let mut maintenance_tasks = JoinSet::new();
|
||||
|
||||
let refresh_config_notify = Arc::new(Notify::new());
|
||||
@@ -176,9 +182,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
// trigger the first config load **after** setting up the signal hook
|
||||
// to avoid the race condition where:
|
||||
// 1. No config file registered when local_proxy starts up
|
||||
// 1. No config file registered when local-proxy starts up
|
||||
// 2. The config file is written but the signal hook is not yet received
|
||||
// 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
|
||||
// 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
|
||||
refresh_config_notify.notify_one();
|
||||
tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
|
||||
|
||||
@@ -305,7 +311,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
|
||||
|
||||
let mut jwks_set = vec![];
|
||||
|
||||
for jwks in data.jwks.into_iter().flatten() {
|
||||
for jwks in data.jwks {
|
||||
let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
|
||||
|
||||
ensure!(
|
||||
|
||||
@@ -19,8 +19,8 @@ use proxy::config::CacheOptions;
|
||||
use proxy::config::HttpConfig;
|
||||
use proxy::config::ProjectInfoCacheOptions;
|
||||
use proxy::config::ProxyProtocolV2;
|
||||
use proxy::console;
|
||||
use proxy::context::parquet::ParquetUploadArgs;
|
||||
use proxy::control_plane;
|
||||
use proxy::http;
|
||||
use proxy::http::health_server::AppMetrics;
|
||||
use proxy::metrics::Metrics;
|
||||
@@ -495,7 +495,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
proxy: proxy::metrics::Metrics::get(),
|
||||
},
|
||||
));
|
||||
maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
|
||||
maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
|
||||
|
||||
if let Some(metrics_config) = &config.metric_collection {
|
||||
// TODO: Add gc regardles of the metric collection being enabled.
|
||||
@@ -506,8 +506,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
));
|
||||
}
|
||||
|
||||
if let auth::Backend::ControlPlane(api, _) = &config.auth_backend {
|
||||
if let proxy::control_plane::provider::ControlPlaneBackend::Management(api) = &**api {
|
||||
if let auth::Backend::Console(api, _) = &config.auth_backend {
|
||||
if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
|
||||
match (redis_notifications_client, regional_redis_client.clone()) {
|
||||
(None, None) => {}
|
||||
(client1, client2) => {
|
||||
@@ -623,7 +623,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
"Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
|
||||
);
|
||||
info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
|
||||
let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
|
||||
let caches = Box::leak(Box::new(console::caches::ApiCaches::new(
|
||||
wake_compute_cache_config,
|
||||
project_info_cache_config,
|
||||
endpoint_cache_config,
|
||||
@@ -636,7 +636,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
timeout,
|
||||
} = args.wake_compute_lock.parse()?;
|
||||
info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
|
||||
let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
|
||||
let locks = Box::leak(Box::new(console::locks::ApiLocks::new(
|
||||
"wake_compute_lock",
|
||||
limiter,
|
||||
shards,
|
||||
@@ -653,27 +653,27 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
|
||||
let wake_compute_endpoint_rate_limiter =
|
||||
Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
|
||||
let api = control_plane::provider::neon::Api::new(
|
||||
let api = console::provider::neon::Api::new(
|
||||
endpoint,
|
||||
caches,
|
||||
locks,
|
||||
wake_compute_endpoint_rate_limiter,
|
||||
);
|
||||
let api = control_plane::provider::ControlPlaneBackend::Management(api);
|
||||
auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
|
||||
let api = console::provider::ConsoleBackend::Console(api);
|
||||
auth::Backend::Console(MaybeOwned::Owned(api), ())
|
||||
}
|
||||
|
||||
AuthBackendType::Web => {
|
||||
let url = args.uri.parse()?;
|
||||
auth::Backend::ConsoleRedirect(MaybeOwned::Owned(url), ())
|
||||
auth::Backend::Web(MaybeOwned::Owned(url), ())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
AuthBackendType::Postgres => {
|
||||
let url = args.auth_endpoint.parse()?;
|
||||
let api = control_plane::provider::mock::Api::new(url, !args.is_private_access_proxy);
|
||||
let api = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
|
||||
auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
|
||||
let api = console::provider::mock::Api::new(url, !args.is_private_access_proxy);
|
||||
let api = console::provider::ConsoleBackend::Postgres(api);
|
||||
auth::Backend::Console(MaybeOwned::Owned(api), ())
|
||||
}
|
||||
};
|
||||
|
||||
@@ -689,7 +689,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
?epoch,
|
||||
"Using NodeLocks (connect_compute)"
|
||||
);
|
||||
let connect_compute_locks = control_plane::locks::ApiLocks::new(
|
||||
let connect_compute_locks = console::locks::ApiLocks::new(
|
||||
"connect_compute_lock",
|
||||
limiter,
|
||||
shards,
|
||||
|
||||
2
proxy/src/cache/project_info.rs
vendored
2
proxy/src/cache/project_info.rs
vendored
@@ -16,7 +16,7 @@ use tracing::{debug, info};
|
||||
use crate::{
|
||||
auth::IpPattern,
|
||||
config::ProjectInfoCacheOptions,
|
||||
control_plane::AuthSecret,
|
||||
console::AuthSecret,
|
||||
intern::{EndpointIdInt, ProjectIdInt, RoleNameInt},
|
||||
EndpointId, RoleName,
|
||||
};
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use crate::{
|
||||
auth::parse_endpoint_param,
|
||||
cancellation::CancelClosure,
|
||||
console::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
|
||||
context::RequestMonitoring,
|
||||
control_plane::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
|
||||
error::{ReportableError, UserFacingError},
|
||||
metrics::{Metrics, NumDbConnectionsGuard},
|
||||
proxy::neon_option,
|
||||
@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
|
||||
use tokio_postgres_rustls::MakeRustlsConnect;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
|
||||
const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub(crate) enum ConnectionError {
|
||||
|
||||
@@ -3,7 +3,7 @@ use crate::{
|
||||
self,
|
||||
backend::{jwt::JwkCache, AuthRateLimiter},
|
||||
},
|
||||
control_plane::locks::ApiLocks,
|
||||
console::locks::ApiLocks,
|
||||
rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
|
||||
scram::threadpool::ThreadPool,
|
||||
serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
|
||||
@@ -372,7 +372,7 @@ pub struct EndpointCacheConfig {
|
||||
}
|
||||
|
||||
impl EndpointCacheConfig {
|
||||
/// Default options for [`crate::control_plane::provider::NodeInfoCache`].
|
||||
/// Default options for [`crate::console::provider::NodeInfoCache`].
|
||||
/// Notice that by default the limiter is empty, which means that cache is disabled.
|
||||
pub const CACHE_DEFAULT_OPTIONS: &'static str =
|
||||
"initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
|
||||
@@ -447,7 +447,7 @@ pub struct CacheOptions {
|
||||
}
|
||||
|
||||
impl CacheOptions {
|
||||
/// Default options for [`crate::control_plane::provider::NodeInfoCache`].
|
||||
/// Default options for [`crate::console::provider::NodeInfoCache`].
|
||||
pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
|
||||
|
||||
/// Parse cache options passed via cmdline.
|
||||
@@ -503,7 +503,7 @@ pub struct ProjectInfoCacheOptions {
|
||||
}
|
||||
|
||||
impl ProjectInfoCacheOptions {
|
||||
/// Default options for [`crate::control_plane::provider::NodeInfoCache`].
|
||||
/// Default options for [`crate::console::provider::NodeInfoCache`].
|
||||
pub const CACHE_DEFAULT_OPTIONS: &'static str =
|
||||
"size=10000,ttl=4m,max_roles=10,gc_interval=60m";
|
||||
|
||||
@@ -622,9 +622,9 @@ pub struct ConcurrencyLockOptions {
|
||||
}
|
||||
|
||||
impl ConcurrencyLockOptions {
|
||||
/// Default options for [`crate::control_plane::provider::ApiLocks`].
|
||||
/// Default options for [`crate::console::provider::ApiLocks`].
|
||||
pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
|
||||
/// Default options for [`crate::control_plane::provider::ApiLocks`].
|
||||
/// Default options for [`crate::console::provider::ApiLocks`].
|
||||
pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
|
||||
"shards=64,permits=100,epoch=10m,timeout=10ms";
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user