diff --git a/.github/ansible/staging.eu-west-1.hosts.yaml b/.github/ansible/staging.eu-west-1.hosts.yaml
index 39f5613935..a54ced7f3a 100644
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -35,6 +35,8 @@ storage:
hosts:
pageserver-0.eu-west-1.aws.neon.build:
ansible_host: i-01d496c5041c7f34c
+ pageserver-1.eu-west-1.aws.neon.build:
+ ansible_host: i-0e8013e239ce3928c
safekeepers:
hosts:
@@ -44,3 +46,15 @@ storage:
ansible_host: i-06969ee1bf2958bfc
safekeeper-2.eu-west-1.aws.neon.build:
ansible_host: i-087892e9625984a0b
+ safekeeper-3.eu-west-1.aws.neon.build:
+ ansible_host: i-0a6f91660e99e8891
+ safekeeper-4.eu-west-1.aws.neon.build:
+ ansible_host: i-0012e309e28e7c249
+ safekeeper-5.eu-west-1.aws.neon.build:
+ ansible_host: i-085a2b1193287b32e
+ safekeeper-6.eu-west-1.aws.neon.build:
+ ansible_host: i-0c713248465ed0fbd
+ safekeeper-7.eu-west-1.aws.neon.build:
+ ansible_host: i-02ad231aed2a80b7a
+ safekeeper-8.eu-west-1.aws.neon.build:
+ ansible_host: i-0dbbd8ffef66efda8
diff --git a/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml b/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml
new file mode 100644
index 0000000000..a80423b12d
--- /dev/null
+++ b/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.alpha.eu-central-1.internal.aws.neon.build"
+
+settings:
+ domain: "*.snirouter.alpha.eu-central-1.internal.aws.neon.build"
+ sentryEnvironment: "staging"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml b/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml
new file mode 100644
index 0000000000..c9c628af0c
--- /dev/null
+++ b/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.zeta.eu-west-1.internal.aws.neon.build"
+
+settings:
+ domain: "*.snirouter.zeta.eu-west-1.internal.aws.neon.build"
+ sentryEnvironment: "staging"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml b/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml
new file mode 100644
index 0000000000..68ad096df7
--- /dev/null
+++ b/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.beta.us-east-2.internal.aws.neon.build"
+
+settings:
+ domain: "*.snirouter.beta.us-east-2.internal.aws.neon.build"
+ sentryEnvironment: "staging"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml b/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml
new file mode 100644
index 0000000000..478ad5631c
--- /dev/null
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.epsilon.ap-southeast-1.internal.aws.neon.tech"
+
+settings:
+ domain: "*.snirouter.epsilon.ap-southeast-1.internal.aws.neon.tech"
+ sentryEnvironment: "production"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml b/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml
new file mode 100644
index 0000000000..08a0a163bc
--- /dev/null
+++ b/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.gamma.eu-central-1.internal.aws.neon.tech"
+
+settings:
+ domain: "*.snirouter.gamma.eu-central-1.internal.aws.neon.tech"
+ sentryEnvironment: "production"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml b/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml
new file mode 100644
index 0000000000..ab308131bc
--- /dev/null
+++ b/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.theta.us-east-1.internal.aws.neon.tech"
+
+settings:
+ domain: "*.snirouter.theta.us-east-1.internal.aws.neon.tech"
+ sentryEnvironment: "production"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml b/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml
new file mode 100644
index 0000000000..ecb3f156ec
--- /dev/null
+++ b/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.delta.us-east-2.internal.aws.neon.tech"
+
+settings:
+ domain: "*.snirouter.delta.us-east-2.internal.aws.neon.tech"
+ sentryEnvironment: "production"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml b/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml
new file mode 100644
index 0000000000..942250c419
--- /dev/null
+++ b/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+ # exposedService.port -- Exposed Service proxy port
+ port: 4432
+ annotations:
+ external-dns.alpha.kubernetes.io/hostname: "*.snirouter.eta.us-west-2.internal.aws.neon.tech"
+
+settings:
+ domain: "*.snirouter.eta.us-west-2.internal.aws.neon.tech"
+ sentryEnvironment: "production"
+
+imagePullSecrets:
+ - name: docker-hub-neon
+
+metrics:
+ enabled: false
diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml
index 5d1c6e0e16..f37e1b344d 100644
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -27,6 +27,11 @@ on:
required: true
type: boolean
default: true
+ deployPgSniRouter:
+ description: 'Deploy pg-sni-router'
+ required: true
+ type: boolean
+ default: true
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
@@ -227,3 +232,49 @@ jobs:
- name: Cleanup helm folder
run: rm -rf ~/.cache
+
+ deploy-pg-sni-router:
+ runs-on: [ self-hosted, gen3, small ]
+ container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+ if: inputs.deployPgSniRouter
+ defaults:
+ run:
+ shell: bash
+ strategy:
+ matrix:
+ include:
+ - target_region: us-east-2
+ target_cluster: dev-us-east-2-beta
+ - target_region: eu-west-1
+ target_cluster: dev-eu-west-1-zeta
+ - target_region: eu-central-1
+ target_cluster: dev-eu-central-1-alpha
+ environment:
+ name: dev-${{ matrix.target_region }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: true
+ fetch-depth: 0
+ ref: ${{ inputs.branch }}
+
+ - name: Configure AWS Credentials
+ uses: aws-actions/configure-aws-credentials@v1-node16
+ with:
+ role-to-assume: arn:aws:iam::369495373322:role/github-runner
+ aws-region: eu-central-1
+ role-skip-session-tagging: true
+ role-duration-seconds: 1800
+
+ - name: Configure environment
+ run: |
+ helm repo add neondatabase https://neondatabase.github.io/helm-charts
+ aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
+
+ - name: Deploy pg-sni-router
+ run:
+ helm upgrade neon-pg-sni-router neondatabase/neon-pg-sni-router --namespace neon-pg-sni-router --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.pg-sni-router.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 15m0s
+
+ - name: Cleanup helm folder
+ run: rm -rf ~/.cache
diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml
index 9fa31b3225..c5d690db3a 100644
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -27,6 +27,11 @@ on:
required: true
type: boolean
default: true
+ deployPgSniRouter:
+ description: 'Deploy pg-sni-router'
+ required: true
+ type: boolean
+ default: true
disclamerAcknowledged:
description: 'I confirm that there is an emergency and I can not use regular release workflow'
required: true
@@ -171,3 +176,42 @@ jobs:
- name: Deploy storage-broker
run:
helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+
+ deploy-pg-sni-router:
+ runs-on: prod
+ container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+ if: inputs.deployPgSniRouter && inputs.disclamerAcknowledged
+ defaults:
+ run:
+ shell: bash
+ strategy:
+ matrix:
+ include:
+ - target_region: us-east-2
+ target_cluster: prod-us-east-2-delta
+ - target_region: us-west-2
+ target_cluster: prod-us-west-2-eta
+ - target_region: eu-central-1
+ target_cluster: prod-eu-central-1-gamma
+ - target_region: ap-southeast-1
+ target_cluster: prod-ap-southeast-1-epsilon
+ - target_region: us-east-1
+ target_cluster: prod-us-east-1-theta
+ environment:
+ name: prod-${{ matrix.target_region }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: true
+ fetch-depth: 0
+ ref: ${{ inputs.branch }}
+
+ - name: Configure environment
+ run: |
+ helm repo add neondatabase https://neondatabase.github.io/helm-charts
+ aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
+
+ - name: Deploy pg-sni-router
+ run:
+ helm upgrade neon-pg-sni-router neondatabase/neon-pg-sni-router --namespace neon-pg-sni-router --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.pg-sni-router.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 15m0s
diff --git a/Cargo.lock b/Cargo.lock
index 5f3a83ce2d..bce2d11188 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1574,6 +1574,21 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
[[package]]
name = "form_urlencoded"
version = "1.1.0"
@@ -2361,6 +2376,24 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
+[[package]]
+name = "native-tls"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
+dependencies = [
+ "lazy_static",
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
[[package]]
name = "nix"
version = "0.26.2"
@@ -2483,12 +2516,50 @@ version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+[[package]]
+name = "openssl"
+version = "0.10.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.15",
+]
+
[[package]]
name = "openssl-probe"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
+[[package]]
+name = "openssl-sys"
+version = "0.9.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
[[package]]
name = "opentelemetry"
version = "0.18.0"
@@ -2816,6 +2887,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+[[package]]
+name = "pkg-config"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+
[[package]]
name = "plotters"
version = "0.3.4"
@@ -2847,7 +2924,7 @@ dependencies = [
[[package]]
name = "postgres"
version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
dependencies = [
"bytes",
"fallible-iterator",
@@ -2857,10 +2934,21 @@ dependencies = [
"tokio-postgres",
]
+[[package]]
+name = "postgres-native-tls"
+version = "0.5.0"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+dependencies = [
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tokio-postgres",
+]
+
[[package]]
name = "postgres-protocol"
version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
dependencies = [
"base64 0.20.0",
"byteorder",
@@ -2878,7 +2966,7 @@ dependencies = [
[[package]]
name = "postgres-types"
version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
dependencies = [
"bytes",
"fallible-iterator",
@@ -3109,10 +3197,12 @@ dependencies = [
"itertools",
"md5",
"metrics",
+ "native-tls",
"once_cell",
"opentelemetry",
"parking_lot",
"pin-project-lite",
+ "postgres-native-tls",
"postgres_backend",
"pq_proto",
"prometheus",
@@ -3567,6 +3657,7 @@ dependencies = [
"const_format",
"crc32c",
"fs2",
+ "futures",
"git-version",
"hex",
"humantime",
@@ -3581,6 +3672,7 @@ dependencies = [
"pq_proto",
"regex",
"remote_storage",
+ "reqwest",
"safekeeper_api",
"serde",
"serde_json",
@@ -3868,8 +3960,7 @@ dependencies = [
[[package]]
name = "sharded-slab"
version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
+source = "git+https://github.com/neondatabase/sharded-slab.git?rev=98d16753ab01c61f0a028de44167307a00efea00#98d16753ab01c61f0a028de44167307a00efea00"
dependencies = [
"lazy_static",
]
@@ -4319,10 +4410,20 @@ dependencies = [
"syn 2.0.15",
]
+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
[[package]]
name = "tokio-postgres"
version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
dependencies = [
"async-trait",
"byteorder",
@@ -4914,6 +5015,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
[[package]]
name = "version_check"
version = "0.9.4"
diff --git a/Cargo.toml b/Cargo.toml
index f4872433cd..b73e29ef6c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,6 +62,7 @@ jsonwebtoken = "8"
libc = "0.2"
md5 = "0.7.0"
memoffset = "0.8"
+native-tls = "0.2"
nix = "0.26"
notify = "5.0.0"
num_cpus = "1.15"
@@ -124,10 +125,11 @@ env_logger = "0.10"
log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
## Other git libraries
@@ -159,10 +161,16 @@ rstest = "0.17"
tempfile = "3.4"
tonic-build = "0.9"
+[patch.crates-io]
+
# This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
-[patch.crates-io]
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+
+# Changes the MAX_THREADS limit from 4096 to 32768.
+# This is a temporary workaround for using tracing from many threads in safekeepers code,
+# until async safekeepers patch is merged to the main.
+sharded-slab = { git = "https://github.com/neondatabase/sharded-slab.git", rev="98d16753ab01c61f0a028de44167307a00efea00" }
################# Binary contents sections
diff --git a/Dockerfile b/Dockerfile
index 6f7d2c32a5..f83f3b1c21 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -44,7 +44,15 @@ COPY --chown=nonroot . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \
-&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
+ && mold -run cargo build \
+ --bin pg_sni_router \
+ --bin pageserver \
+ --bin pageserver_binutils \
+ --bin draw_timeline_dir \
+ --bin safekeeper \
+ --bin storage_broker \
+ --bin proxy \
+ --locked --release \
&& cachepot -s
# Build final image
@@ -63,6 +71,7 @@ RUN set -e \
&& useradd -d /data neon \
&& chown -R neon:neon /data
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir /usr/local/bin
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index deb20f21f8..b5d7eb0132 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -287,14 +287,33 @@ impl EvictionsWithLowResidenceDuration {
let Some(_counter) = self.counter.take() else {
return;
};
- EVICTIONS_WITH_LOW_RESIDENCE_DURATION
- .remove_label_values(&[
- tenant_id,
- timeline_id,
- self.data_source,
- &Self::threshold_label_value(self.threshold),
- ])
- .expect("we own the metric, no-one else should remove it");
+
+ let threshold = Self::threshold_label_value(self.threshold);
+
+ let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
+ tenant_id,
+ timeline_id,
+ self.data_source,
+ &threshold,
+ ]);
+
+ match removed {
+ Err(e) => {
+ // this has been hit in staging as
+ // , but we don't know how.
+ // because we can be in the drop path already, don't risk:
+ // - "double-panic => illegal instruction" or
+ // - future "drop panick => abort"
+ //
+ // so just nag: (the error has the labels)
+ tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
+ }
+ Ok(()) => {
+ // to help identify cases where we double-remove the same values, let's log all
+ // deletions?
+ tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
+ }
+ }
}
}
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index 8b0795db3c..a7a0d1a22e 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -352,7 +352,7 @@ impl PageServerHandler {
tenant_id: TenantId,
timeline_id: TimelineId,
ctx: RequestContext,
- ) -> anyhow::Result<()>
+ ) -> Result<(), QueryError>
where
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
{
@@ -398,7 +398,9 @@ impl PageServerHandler {
Some(FeMessage::CopyData(bytes)) => bytes,
Some(FeMessage::Terminate) => break,
Some(m) => {
- anyhow::bail!("unexpected message: {m:?} during COPY");
+ return Err(QueryError::Other(anyhow::anyhow!(
+ "unexpected message: {m:?} during COPY"
+ )));
}
None => break, // client disconnected
};
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index 9d702b29c3..e7a4fd236e 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -62,6 +62,8 @@ utils.workspace = true
uuid.workspace = true
webpki-roots.workspace = true
x509-parser.workspace = true
+native-tls.workspace = true
+postgres-native-tls.workspace = true
workspace_hack.workspace = true
tokio-util.workspace = true
diff --git a/proxy/src/auth/backend/link.rs b/proxy/src/auth/backend/link.rs
index 7175a23dc1..da43cf11c4 100644
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -9,6 +9,7 @@ use crate::{
use pq_proto::BeMessage as Be;
use thiserror::Error;
use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_postgres::config::SslMode;
use tracing::{info, info_span};
#[derive(Debug, Error)]
@@ -87,6 +88,16 @@ pub(super) async fn authenticate(
.dbname(&db_info.dbname)
.user(&db_info.user);
+ // Backwards compatibility. pg_sni_proxy uses "--" in domain names
+ // while direct connections do not. Once we migrate to pg_sni_proxy
+ // everywhere, we can remove this.
+ if db_info.host.contains("--") {
+ // we need TLS connection with SNI info to properly route it
+ config.ssl_mode(SslMode::Require);
+ } else {
+ config.ssl_mode(SslMode::Disable);
+ }
+
if let Some(password) = db_info.password {
config.password(password.as_ref());
}
@@ -96,6 +107,7 @@ pub(super) async fn authenticate(
value: NodeInfo {
config,
aux: db_info.aux.into(),
+ allow_self_signed_compute: false, // caller may override
},
})
}
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
new file mode 100644
index 0000000000..bba2d51caf
--- /dev/null
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -0,0 +1,250 @@
+/// A stand-alone program that routes connections, e.g. from
+/// `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.
+///
+/// This allows connecting to pods/services running in the same Kubernetes cluster from
+/// the outside. Similar to an ingress controller for HTTPS.
+use std::{net::SocketAddr, sync::Arc};
+
+use tokio::net::TcpListener;
+
+use anyhow::{anyhow, bail, ensure, Context};
+use clap::{self, Arg};
+use futures::TryFutureExt;
+use proxy::console::messages::MetricsAuxInfo;
+use proxy::stream::{PqStream, Stream};
+
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_util::sync::CancellationToken;
+use utils::{project_git_version, sentry_init::init_sentry};
+
+use tracing::{error, info, warn};
+
+project_git_version!(GIT_VERSION);
+
+fn cli() -> clap::Command {
+ clap::Command::new("Neon proxy/router")
+ .version(GIT_VERSION)
+ .arg(
+ Arg::new("listen")
+ .short('l')
+ .long("listen")
+ .help("listen for incoming client connections on ip:port")
+ .default_value("127.0.0.1:4432"),
+ )
+ .arg(
+ Arg::new("tls-key")
+ .short('k')
+ .long("tls-key")
+ .help("path to TLS key for client postgres connections")
+ .required(true),
+ )
+ .arg(
+ Arg::new("tls-cert")
+ .short('c')
+ .long("tls-cert")
+ .help("path to TLS cert for client postgres connections")
+ .required(true),
+ )
+ .arg(
+ Arg::new("dest")
+ .short('d')
+ .long("destination")
+ .help("append this domain zone to the SNI hostname to get the destination address")
+ .required(true),
+ )
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+ let _logging_guard = proxy::logging::init().await?;
+ let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
+ let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
+
+ let args = cli().get_matches();
+ let destination: String = args.get_one::("dest").unwrap().parse()?;
+
+ // Configure TLS
+ let tls_config: Arc = match (
+ args.get_one::("tls-key"),
+ args.get_one::("tls-cert"),
+ ) {
+ (Some(key_path), Some(cert_path)) => {
+ let key = {
+ let key_bytes = std::fs::read(key_path).context("TLS key file")?;
+ let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..])
+ .context(format!("Failed to read TLS keys at '{key_path}'"))?;
+
+ ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
+ keys.pop().map(rustls::PrivateKey).unwrap()
+ };
+
+ let cert_chain_bytes = std::fs::read(cert_path)
+ .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
+
+ let cert_chain = {
+ rustls_pemfile::certs(&mut &cert_chain_bytes[..])
+ .context(format!(
+ "Failed to read TLS certificate chain from bytes from file at '{cert_path}'."
+ ))?
+ .into_iter()
+ .map(rustls::Certificate)
+ .collect()
+ };
+
+ rustls::ServerConfig::builder()
+ .with_safe_default_cipher_suites()
+ .with_safe_default_kx_groups()
+ .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
+ .with_no_client_auth()
+ .with_single_cert(cert_chain, key)?
+ .into()
+ }
+ _ => bail!("tls-key and tls-cert must be specified"),
+ };
+
+ // Start listening for incoming client connections
+ let proxy_address: SocketAddr = args.get_one::("listen").unwrap().parse()?;
+ info!("Starting sni router on {proxy_address}");
+ let proxy_listener = TcpListener::bind(proxy_address).await?;
+
+ let cancellation_token = CancellationToken::new();
+
+ let main = proxy::flatten_err(tokio::spawn(task_main(
+ Arc::new(destination),
+ tls_config,
+ proxy_listener,
+ cancellation_token.clone(),
+ )));
+ let signals_task = proxy::flatten_err(tokio::spawn(proxy::handle_signals(cancellation_token)));
+
+ tokio::select! {
+ res = main => { res?; },
+ res = signals_task => { res?; },
+ }
+
+ Ok(())
+}
+
+async fn task_main(
+ dest_suffix: Arc,
+ tls_config: Arc,
+ listener: tokio::net::TcpListener,
+ cancellation_token: CancellationToken,
+) -> anyhow::Result<()> {
+ // When set for the server socket, the keepalive setting
+ // will be inherited by all accepted client sockets.
+ socket2::SockRef::from(&listener).set_keepalive(true)?;
+
+ let mut connections = tokio::task::JoinSet::new();
+
+ loop {
+ tokio::select! {
+ accept_result = listener.accept() => {
+ let (socket, peer_addr) = accept_result?;
+ info!("accepted postgres client connection from {peer_addr}");
+
+ let session_id = uuid::Uuid::new_v4();
+ let tls_config = Arc::clone(&tls_config);
+ let dest_suffix = Arc::clone(&dest_suffix);
+
+ connections.spawn(
+ async move {
+ info!("spawned a task for {peer_addr}");
+
+ socket
+ .set_nodelay(true)
+ .context("failed to set socket option")?;
+
+ handle_client(dest_suffix, tls_config, session_id, socket).await
+ }
+ .unwrap_or_else(|e| {
+ // Acknowledge that the task has finished with an error.
+ error!("per-client task finished with an error: {e:#}");
+ }),
+ );
+ }
+ _ = cancellation_token.cancelled() => {
+ drop(listener);
+ break;
+ }
+ }
+ }
+
+ // Drain connections
+ info!("waiting for all client connections to finish");
+ while let Some(res) = connections.join_next().await {
+ if let Err(e) = res {
+ if !e.is_panic() && !e.is_cancelled() {
+ warn!("unexpected error from joined connection task: {e:?}");
+ }
+ }
+ }
+ info!("all client connections have finished");
+ Ok(())
+}
+
+const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
+
+async fn ssl_handshake(
+ raw_stream: S,
+ tls_config: Arc,
+) -> anyhow::Result> {
+ let mut stream = PqStream::new(Stream::from_raw(raw_stream));
+
+ let msg = stream.read_startup_packet().await?;
+ info!("received {msg:?}");
+ use pq_proto::FeStartupPacket::*;
+
+ match msg {
+ SslRequest => {
+ stream
+ .write_message(&pq_proto::BeMessage::EncryptionResponse(true))
+ .await?;
+ // Upgrade raw stream into a secure TLS-backed stream.
+ // NOTE: We've consumed `tls`; this fact will be used later.
+
+ let (raw, read_buf) = stream.into_inner();
+ // TODO: Normally, client doesn't send any data before
+ // server says TLS handshake is ok and read_buf is empy.
+ // However, you could imagine pipelining of postgres
+ // SSLRequest + TLS ClientHello in one hunk similar to
+ // pipelining in our node js driver. We should probably
+ // support that by chaining read_buf with the stream.
+ if !read_buf.is_empty() {
+ bail!("data is sent before server replied with EncryptionResponse");
+ }
+ Ok(raw.upgrade(tls_config).await?)
+ }
+ _ => stream.throw_error_str(ERR_INSECURE_CONNECTION).await?,
+ }
+}
+
+#[tracing::instrument(fields(session_id = ?session_id), skip_all)]
+async fn handle_client(
+ dest_suffix: Arc,
+ tls_config: Arc,
+ session_id: uuid::Uuid,
+ stream: impl AsyncRead + AsyncWrite + Unpin,
+) -> anyhow::Result<()> {
+ let tls_stream = ssl_handshake(stream, tls_config).await?;
+
+ // Cut off first part of the SNI domain
+ // We receive required destination details in the format of
+ // `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`
+ let sni = tls_stream.sni_hostname().ok_or(anyhow!("SNI missing"))?;
+ let dest: Vec<&str> = sni
+ .split_once('.')
+ .context("invalid SNI")?
+ .0
+ .splitn(3, "--")
+ .collect();
+ let port = dest[2].parse::().context("invalid port")?;
+ let destination = format!("{}.{}.{}:{}", dest[0], dest[1], dest_suffix, port);
+
+ info!("destination: {}", destination);
+
+ let client = tokio::net::TcpStream::connect(destination).await?;
+
+ let metrics_aux: MetricsAuxInfo = Default::default();
+ proxy::proxy::proxy_pass(tls_stream, client, &metrics_aux).await
+}
diff --git a/proxy/src/main.rs b/proxy/src/bin/proxy.rs
similarity index 79%
rename from proxy/src/main.rs
rename to proxy/src/bin/proxy.rs
index 1fd13c9f68..28e6e25317 100644
--- a/proxy/src/main.rs
+++ b/proxy/src/bin/proxy.rs
@@ -1,49 +1,23 @@
-//! Postgres protocol proxy/router.
-//!
-//! This service listens psql port and can check auth via external service
-//! (control plane API in our case) and can create new databases and accounts
-//! in somewhat transparent manner (again via communication with control plane API).
+use proxy::auth;
+use proxy::console;
+use proxy::http;
+use proxy::metrics;
-mod auth;
-mod cache;
-mod cancellation;
-mod compute;
-mod config;
-mod console;
-mod error;
-mod http;
-mod logging;
-mod metrics;
-mod parse;
-mod proxy;
-mod sasl;
-mod scram;
-mod stream;
-mod url;
-mod waiters;
-
-use anyhow::{bail, Context};
+use anyhow::bail;
use clap::{self, Arg};
-use config::ProxyConfig;
-use futures::FutureExt;
-use std::{borrow::Cow, future::Future, net::SocketAddr};
-use tokio::{net::TcpListener, task::JoinError};
+use proxy::config::{self, ProxyConfig};
+use std::{borrow::Cow, net::SocketAddr};
+use tokio::net::TcpListener;
use tokio_util::sync::CancellationToken;
-use tracing::{info, warn};
+use tracing::info;
+use tracing::warn;
use utils::{project_git_version, sentry_init::init_sentry};
project_git_version!(GIT_VERSION);
-/// Flattens `Result>` into `Result`.
-async fn flatten_err(
- f: impl Future