mirror of
https://github.com/neondatabase/neon.git
synced 2026-07-03 12:10:36 +00:00
Compare commits
9 Commits
problame/r
...
proxy-skip
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6062cd180c | ||
|
|
d2a6cd1182 | ||
|
|
75bfd22084 | ||
|
|
3cb3dce34e | ||
|
|
c7e43f40ba | ||
|
|
16d80128ee | ||
|
|
2ba414525e | ||
|
|
46210035c5 | ||
|
|
81892199f6 |
8
.github/workflows/benchmarking.yml
vendored
8
.github/workflows/benchmarking.yml
vendored
@@ -99,7 +99,7 @@ jobs:
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
|
||||
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py --ignore test_runner/performance/test_perf_pgvector_queries.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -410,14 +410,14 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Benchmark pgvector hnsw queries
|
||||
- name: Benchmark pgvector queries
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
test_selection: performance/test_perf_pgvector_queries.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_pgvector
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
|
||||
56
.github/workflows/build_and_test.yml
vendored
56
.github/workflows/build_and_test.yml
vendored
@@ -337,34 +337,8 @@ jobs:
|
||||
run: |
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
- name: Run rust tests
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
#nextest does not yet support running doctests
|
||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
# binaries with different features/flags.
|
||||
- name: Install rust binaries
|
||||
run: |
|
||||
# Install target binaries
|
||||
@@ -405,6 +379,32 @@ jobs:
|
||||
done
|
||||
fi
|
||||
|
||||
- name: Run rust tests
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
#nextest does not yet support running doctests
|
||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: cp -a pg_install /tmp/neon/pg_install
|
||||
|
||||
|
||||
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -4002,7 +4002,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4015,7 +4015,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -4034,7 +4034,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -6204,7 +6204,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
|
||||
10
Cargo.toml
10
Cargo.toml
@@ -198,10 +198,10 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
|
||||
## Other git libraries
|
||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
||||
@@ -240,7 +240,7 @@ tonic-build = "0.9"
|
||||
[patch.crates-io]
|
||||
|
||||
# Needed to get `tokio-postgres-rustls` to depend on our fork.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
|
||||
# bug fixes for UUID
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
# Postgres Bundle for Pageserver
|
||||
|
||||
Created on 2024-06-17
|
||||
|
||||
## Summary
|
||||
|
||||
This RFC defines the responsibilities of Compute and Storage team regarding the
|
||||
build & deployment of the Postgres code that Pageserver must run
|
||||
(`initdb`, `postgres --wal-redo`).
|
||||
|
||||
## Motivation
|
||||
|
||||
Pageserver has to run Postgres binaries to do its job, specifically
|
||||
|
||||
* `initdb`
|
||||
* `postgres --wal-redo` mode
|
||||
|
||||
Currently there is **no clear ownership** of
|
||||
* how these binaries are built
|
||||
* including, critically, dynamic linkage against other libraries such as `libicu`
|
||||
* what build of the binaries ends up running on Pageservers
|
||||
* how the binaries and runtime dependencies (e.g., shared libraries) are delivered to Pageservers
|
||||
|
||||
Further, these binaries have dependencies (e.g., libicu) which
|
||||
1. prevent the Storage team from switching Pageserver distro and/or version, and
|
||||
2. some dependencies impact compatibility between Storage and Compute (e.g., [libicu version impacts collation incompatibilty](https://github.com/neondatabase/neon/pull/8074))
|
||||
3. some dependencies can cause database corruption if updated carelessly (locale => libc)
|
||||
|
||||
## Why Is This Worth Solving
|
||||
|
||||
1. Clearly defined ownership generally boosts execution speed & bug triage.
|
||||
* Example for why execution speed matters: CVE in dependency => who takes care of patching & updating.
|
||||
2. Centralize understanding of risks involved with some dependencies.
|
||||
Currently, there is no team clearly responsible for assessing / tracking the risks. As a reminder from previous section, these are
|
||||
* runtime incompatibilities
|
||||
* database corruption
|
||||
|
||||
Also, it is an unlock for additional future value, see "Future Work" section.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Pageserver (neon.git)
|
||||
Compute (neon.git)
|
||||
Deployment process (aws.git)
|
||||
|
||||
## Design
|
||||
|
||||
The basic interface between Compute and Storage team is as follows:
|
||||
|
||||
* Compute team publishes a "bundle" of the binaries required by Pageserver
|
||||
* Storage team uses a pinned bundle in the Pageserver build process
|
||||
* Storage team code review is required to update the pinned version
|
||||
|
||||
The "bundle" provides an interface agreed upon by Compute and Storage teams to run
|
||||
* for each supported Postgres version at Neon (v14, v15, v16, ...)
|
||||
* the `initdb` process
|
||||
* behaving like a vanilla Postgres `initdb`
|
||||
* `postgres --wal-redo` mode process
|
||||
* following the walredo protocol specified elsewhere
|
||||
|
||||
The bundle is self-contained, i.e., it behaves the same way on any Linux system.
|
||||
The only ambient runtime dependency is the Linux kernel.
|
||||
The minimum Linux kernel version is 5.10.
|
||||
|
||||
### Variant 1: bundle = fully statically linked binaries
|
||||
The "bundle" is a tarball of fully statically linked binaries
|
||||
|
||||
```
|
||||
v14/initdb
|
||||
v14/postgres
|
||||
v15/initdb
|
||||
v15/postgres
|
||||
v16/initdb
|
||||
v16/postgres
|
||||
...
|
||||
```
|
||||
|
||||
The directory structure is part of the interface.
|
||||
|
||||
### Variant 2: bundle = chrooted directory
|
||||
|
||||
The "bundle" is a tarball that contains all sorts of files, plus a launcher script.
|
||||
|
||||
```
|
||||
LAUNCHER
|
||||
storage
|
||||
storage/does
|
||||
storage/does/not
|
||||
storage/does/not/care
|
||||
```
|
||||
|
||||
To launch `initdb` or `postgres --wal-redo`, the Pageserver does
|
||||
1. fork child process
|
||||
2. `chroot` into the extracted directory
|
||||
3. inside the chroot, run `/LAUNCHER VERSION PG_BINARY [FLAGS...]`
|
||||
4. The `LAUNCHER` script sets up library search paths, etc, and then `exec`s the correct binary
|
||||
|
||||
We acknowledge this is half-way reinventing OCI + linux containers.
|
||||
However, our needs are much simpler than what OCI & Docker provide.
|
||||
Specifically, we do not want Pageserver to be runtime-dependent on e.g. Docker as the launcher.
|
||||
|
||||
The `chroot` is to enforce that the "bundle" be self-contained.
|
||||
The special path `/inout` int he bundle is reserved, e.g., for `initdb` output.
|
||||
|
||||
### Variant 3: ???
|
||||
|
||||
Your design here, feedback welcome.
|
||||
|
||||
## Security implications
|
||||
|
||||
It's an improvement because a single team (Compute) will be responsible for runtime dependencies.
|
||||
|
||||
## Implementation & Rollout
|
||||
|
||||
Storage and Compute teams agree on a bundle definition.
|
||||
|
||||
Compute team changes their build process to produce both
|
||||
1. existing: compute image / vm compute image
|
||||
2. existing: pg_install tarball (currently built by `neon.git:Dockerfile`)
|
||||
2. new: the bundle
|
||||
|
||||
Storage makes `neon.git` Pageserver changes to support using bundle (behind feature flag).
|
||||
With feature flag disabled, existing `pg_install` tarball is used instead.
|
||||
|
||||
Storage & infra make `aws.git` changes to deploy bundle to pageservers, with feature flag disabled.
|
||||
|
||||
Storage team does gradual rollout.
|
||||
|
||||
Storage & infra teams remove support for `pg_install`, delete it from the nodes (experimentation in staging to ensure no hidden runtime deps!)
|
||||
|
||||
Compute team stops producing `pg_install` tarball.
|
||||
|
||||
|
||||
## Future Work
|
||||
|
||||
We know that we can easily make pageserver fully statically linked.
|
||||
Together with the self-contained "bundle" proposed above, Pageserver can then be deployed to different OSes.
|
||||
For example, we have been entertaining the idea of trying Amazon Linux instead of Debian for Pageserver.
|
||||
That experiment would be a lot simpler.
|
||||
@@ -558,6 +558,12 @@ impl KeySpaceRandomAccum {
|
||||
self.ranges.push(range);
|
||||
}
|
||||
|
||||
pub fn add_keyspace(&mut self, keyspace: KeySpace) {
|
||||
for range in keyspace.ranges {
|
||||
self.add_range(range);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_keyspace(mut self) -> KeySpace {
|
||||
let mut ranges = Vec::new();
|
||||
if !self.ranges.is_empty() {
|
||||
|
||||
@@ -144,20 +144,7 @@ impl PgConnectionConfig {
|
||||
// implement and this function is hardly a bottleneck. The function is only called around
|
||||
// establishing a new connection.
|
||||
#[allow(unstable_name_collisions)]
|
||||
config.options(
|
||||
&self
|
||||
.options
|
||||
.iter()
|
||||
.map(|s| {
|
||||
if s.contains(['\\', ' ']) {
|
||||
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
|
||||
} else {
|
||||
Cow::Borrowed(s.as_str())
|
||||
}
|
||||
})
|
||||
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
|
||||
.collect::<String>(),
|
||||
);
|
||||
config.options(&encode_options(&self.options));
|
||||
}
|
||||
config
|
||||
}
|
||||
@@ -178,6 +165,21 @@ impl PgConnectionConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unstable_name_collisions)]
|
||||
fn encode_options(options: &[String]) -> String {
|
||||
options
|
||||
.iter()
|
||||
.map(|s| {
|
||||
if s.contains(['\\', ' ']) {
|
||||
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
|
||||
} else {
|
||||
Cow::Borrowed(s.as_str())
|
||||
}
|
||||
})
|
||||
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
|
||||
.collect::<String>()
|
||||
}
|
||||
|
||||
impl fmt::Display for PgConnectionConfig {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
// The password is intentionally hidden and not part of this display string.
|
||||
@@ -206,7 +208,7 @@ impl fmt::Debug for PgConnectionConfig {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests_pg_connection_config {
|
||||
use crate::PgConnectionConfig;
|
||||
use crate::{encode_options, PgConnectionConfig};
|
||||
use once_cell::sync::Lazy;
|
||||
use url::Host;
|
||||
|
||||
@@ -255,18 +257,12 @@ mod tests_pg_connection_config {
|
||||
|
||||
#[test]
|
||||
fn test_with_options() {
|
||||
let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123).extend_options([
|
||||
"hello",
|
||||
"world",
|
||||
"with space",
|
||||
"and \\ backslashes",
|
||||
let options = encode_options(&[
|
||||
"hello".to_owned(),
|
||||
"world".to_owned(),
|
||||
"with space".to_owned(),
|
||||
"and \\ backslashes".to_owned(),
|
||||
]);
|
||||
assert_eq!(cfg.host(), &*STUB_HOST);
|
||||
assert_eq!(cfg.port(), 123);
|
||||
assert_eq!(cfg.raw_address(), "stub.host.example:123");
|
||||
assert_eq!(
|
||||
cfg.to_tokio_postgres_config().get_options(),
|
||||
Some("hello world with\\ space and\\ \\\\\\ backslashes")
|
||||
);
|
||||
assert_eq!(options, "hello world with\\ space and\\ \\\\\\ backslashes");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -318,7 +318,7 @@ pub(crate) struct LayerFringe {
|
||||
#[derive(Debug)]
|
||||
struct LayerKeyspace {
|
||||
layer: ReadableLayer,
|
||||
target_keyspace: Vec<KeySpace>,
|
||||
target_keyspace: KeySpaceRandomAccum,
|
||||
}
|
||||
|
||||
impl LayerFringe {
|
||||
@@ -342,17 +342,13 @@ impl LayerFringe {
|
||||
_,
|
||||
LayerKeyspace {
|
||||
layer,
|
||||
target_keyspace,
|
||||
mut target_keyspace,
|
||||
},
|
||||
)) => {
|
||||
let mut keyspace = KeySpaceRandomAccum::new();
|
||||
for ks in target_keyspace {
|
||||
for part in ks.ranges {
|
||||
keyspace.add_range(part);
|
||||
}
|
||||
}
|
||||
Some((layer, keyspace.consume_keyspace(), read_desc.lsn_range))
|
||||
}
|
||||
)) => Some((
|
||||
layer,
|
||||
target_keyspace.consume_keyspace(),
|
||||
read_desc.lsn_range,
|
||||
)),
|
||||
None => unreachable!("fringe internals are always consistent"),
|
||||
}
|
||||
}
|
||||
@@ -367,16 +363,18 @@ impl LayerFringe {
|
||||
let entry = self.layers.entry(layer_id.clone());
|
||||
match entry {
|
||||
Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().target_keyspace.push(keyspace);
|
||||
entry.get_mut().target_keyspace.add_keyspace(keyspace);
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
self.planned_reads_by_lsn.push(ReadDesc {
|
||||
lsn_range,
|
||||
layer_id: layer_id.clone(),
|
||||
});
|
||||
let mut accum = KeySpaceRandomAccum::new();
|
||||
accum.add_keyspace(keyspace);
|
||||
entry.insert(LayerKeyspace {
|
||||
layer,
|
||||
target_keyspace: vec![keyspace],
|
||||
target_keyspace: accum,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,12 +103,8 @@ impl ConnCfg {
|
||||
|
||||
/// Reuse password or auth keys from the other config.
|
||||
pub fn reuse_password(&mut self, other: Self) {
|
||||
if let Some(password) = other.get_password() {
|
||||
self.password(password);
|
||||
}
|
||||
|
||||
if let Some(keys) = other.get_auth_keys() {
|
||||
self.auth_keys(keys);
|
||||
if let Some(password) = other.get_auth() {
|
||||
self.auth(password);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,48 +120,64 @@ impl ConnCfg {
|
||||
|
||||
/// Apply startup message params to the connection config.
|
||||
pub fn set_startup_params(&mut self, params: &StartupMessageParams) {
|
||||
// Only set `user` if it's not present in the config.
|
||||
// Link auth flow takes username from the console's response.
|
||||
if let (None, Some(user)) = (self.get_user(), params.get("user")) {
|
||||
self.user(user);
|
||||
}
|
||||
|
||||
// Only set `dbname` if it's not present in the config.
|
||||
// Link auth flow takes dbname from the console's response.
|
||||
if let (None, Some(dbname)) = (self.get_dbname(), params.get("database")) {
|
||||
self.dbname(dbname);
|
||||
}
|
||||
|
||||
// Don't add `options` if they were only used for specifying a project.
|
||||
// Connection pools don't support `options`, because they affect backend startup.
|
||||
if let Some(options) = filtered_options(params) {
|
||||
self.options(&options);
|
||||
}
|
||||
|
||||
if let Some(app_name) = params.get("application_name") {
|
||||
self.application_name(app_name);
|
||||
}
|
||||
|
||||
// TODO: This is especially ugly...
|
||||
if let Some(replication) = params.get("replication") {
|
||||
use tokio_postgres::config::ReplicationMode;
|
||||
match replication {
|
||||
"true" | "on" | "yes" | "1" => {
|
||||
self.replication_mode(ReplicationMode::Physical);
|
||||
let mut client_encoding = false;
|
||||
for (k, v) in params.iter() {
|
||||
match k {
|
||||
"user" => {
|
||||
// Only set `user` if it's not present in the config.
|
||||
// Link auth flow takes username from the console's response.
|
||||
if self.get_user().is_none() {
|
||||
self.user(v);
|
||||
}
|
||||
}
|
||||
"database" => {
|
||||
self.replication_mode(ReplicationMode::Logical);
|
||||
// Only set `dbname` if it's not present in the config.
|
||||
// Link auth flow takes dbname from the console's response.
|
||||
if self.get_dbname().is_none() {
|
||||
self.dbname(v);
|
||||
}
|
||||
}
|
||||
"options" => {
|
||||
// Don't add `options` if they were only used for specifying a project.
|
||||
// Connection pools don't support `options`, because they affect backend startup.
|
||||
if let Some(options) = filtered_options(v) {
|
||||
self.options(&options);
|
||||
}
|
||||
}
|
||||
|
||||
// the special ones in tokio-postgres that we don't want being set by the user
|
||||
"dbname" => {}
|
||||
"password" => {}
|
||||
"sslmode" => {}
|
||||
"host" => {}
|
||||
"port" => {}
|
||||
"connect_timeout" => {}
|
||||
"keepalives" => {}
|
||||
"keepalives_idle" => {}
|
||||
"keepalives_interval" => {}
|
||||
"keepalives_retries" => {}
|
||||
"target_session_attrs" => {}
|
||||
"channel_binding" => {}
|
||||
"max_backend_message_size" => {}
|
||||
|
||||
"client_encoding" => {
|
||||
client_encoding = true;
|
||||
// only error should be from bad null bytes,
|
||||
// but we've already checked for those.
|
||||
_ = self.param("client_encoding", v);
|
||||
}
|
||||
|
||||
_ => {
|
||||
// only error should be from bad null bytes,
|
||||
// but we've already checked for those.
|
||||
_ = self.param(k, v);
|
||||
}
|
||||
_other => {}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: extend the list of the forwarded startup parameters.
|
||||
// Currently, tokio-postgres doesn't allow us to pass
|
||||
// arbitrary parameters, but the ones above are a good start.
|
||||
//
|
||||
// This and the reverse params problem can be better addressed
|
||||
// in a bespoke connection machinery (a new library for that sake).
|
||||
if !client_encoding {
|
||||
// for compatibility since we removed it from tokio-postgres
|
||||
self.param("client_encoding", "UTF8").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,10 +350,9 @@ impl ConnCfg {
|
||||
}
|
||||
|
||||
/// Retrieve `options` from a startup message, dropping all proxy-secific flags.
|
||||
fn filtered_options(params: &StartupMessageParams) -> Option<String> {
|
||||
fn filtered_options(options: &str) -> Option<String> {
|
||||
#[allow(unstable_name_collisions)]
|
||||
let options: String = params
|
||||
.options_raw()?
|
||||
let options: String = StartupMessageParams::parse_options_raw(options)
|
||||
.filter(|opt| parse_endpoint_param(opt).is_none() && neon_option(opt).is_none())
|
||||
.intersperse(" ") // TODO: use impl from std once it's stabilized
|
||||
.collect();
|
||||
@@ -413,27 +424,23 @@ mod tests {
|
||||
#[test]
|
||||
fn test_filtered_options() {
|
||||
// Empty options is unlikely to be useful anyway.
|
||||
let params = StartupMessageParams::new([("options", "")]);
|
||||
assert_eq!(filtered_options(¶ms), None);
|
||||
assert_eq!(filtered_options(""), None);
|
||||
|
||||
// It's likely that clients will only use options to specify endpoint/project.
|
||||
let params = StartupMessageParams::new([("options", "project=foo")]);
|
||||
assert_eq!(filtered_options(¶ms), None);
|
||||
let params = "project=foo";
|
||||
assert_eq!(filtered_options(params), None);
|
||||
|
||||
// Same, because unescaped whitespaces are no-op.
|
||||
let params = StartupMessageParams::new([("options", " project=foo ")]);
|
||||
assert_eq!(filtered_options(¶ms).as_deref(), None);
|
||||
let params = " project=foo ";
|
||||
assert_eq!(filtered_options(params), None);
|
||||
|
||||
let params = StartupMessageParams::new([("options", r"\ project=foo \ ")]);
|
||||
assert_eq!(filtered_options(¶ms).as_deref(), Some(r"\ \ "));
|
||||
let params = r"\ project=foo \ ";
|
||||
assert_eq!(filtered_options(params).as_deref(), Some(r"\ \ "));
|
||||
|
||||
let params = StartupMessageParams::new([("options", "project = foo")]);
|
||||
assert_eq!(filtered_options(¶ms).as_deref(), Some("project = foo"));
|
||||
let params = "project = foo";
|
||||
assert_eq!(filtered_options(params).as_deref(), Some("project = foo"));
|
||||
|
||||
let params = StartupMessageParams::new([(
|
||||
"options",
|
||||
"project = foo neon_endpoint_type:read_write neon_lsn:0/2",
|
||||
)]);
|
||||
assert_eq!(filtered_options(¶ms).as_deref(), Some("project = foo"));
|
||||
let params = "project = foo neon_endpoint_type:read_write neon_lsn:0/2";
|
||||
assert_eq!(filtered_options(params).as_deref(), Some("project = foo"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +231,10 @@ impl ConnectMechanism for TokioMechanism {
|
||||
.dbname(&self.conn_info.dbname)
|
||||
.connect_timeout(timeout);
|
||||
|
||||
config
|
||||
.param("client_encoding", "UTF8")
|
||||
.expect("client encoding UTF8 is always valid");
|
||||
|
||||
let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
|
||||
let res = config.connect(tokio_postgres::NoTls).await;
|
||||
drop(pause);
|
||||
|
||||
@@ -202,6 +202,7 @@ fn get_conn_info(
|
||||
options = Some(NeonOptions::parse_options_raw(&value));
|
||||
}
|
||||
}
|
||||
ctx.set_db_options(params.freeze());
|
||||
|
||||
let user_info = ComputeUserInfo {
|
||||
endpoint,
|
||||
|
||||
@@ -31,6 +31,7 @@ pub(crate) enum PageserverState {
|
||||
Available {
|
||||
last_seen_at: Instant,
|
||||
utilization: PageserverUtilization,
|
||||
new: bool,
|
||||
},
|
||||
Offline,
|
||||
}
|
||||
@@ -127,6 +128,7 @@ impl HeartbeaterTask {
|
||||
heartbeat_futs.push({
|
||||
let jwt_token = self.jwt_token.clone();
|
||||
let cancel = self.cancel.clone();
|
||||
let new_node = !self.state.contains_key(node_id);
|
||||
|
||||
// Clone the node and mark it as available such that the request
|
||||
// goes through to the pageserver even when the node is marked offline.
|
||||
@@ -159,6 +161,7 @@ impl HeartbeaterTask {
|
||||
PageserverState::Available {
|
||||
last_seen_at: Instant::now(),
|
||||
utilization,
|
||||
new: new_node,
|
||||
}
|
||||
} else {
|
||||
PageserverState::Offline
|
||||
@@ -220,6 +223,7 @@ impl HeartbeaterTask {
|
||||
}
|
||||
},
|
||||
Vacant(_) => {
|
||||
// This is a new node. Don't generate a delta for it.
|
||||
deltas.push((node_id, ps_state.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::{str::FromStr, time::Duration};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
|
||||
TenantLocateResponseShard,
|
||||
TenantLocateResponseShard, UtilizationScore,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
@@ -116,6 +116,16 @@ impl Node {
|
||||
match (self.availability, availability) {
|
||||
(Offline, Active(_)) => ToActive,
|
||||
(Active(_), Offline) => ToOffline,
|
||||
// Consider the case when the storage controller handles the re-attach of a node
|
||||
// before the heartbeats detect that the node is back online. We still need
|
||||
// [`Service::node_configure`] to attempt reconciliations for shards with an
|
||||
// unknown observed location.
|
||||
// The unsavoury match arm below handles this situation.
|
||||
(Active(lhs), Active(rhs))
|
||||
if lhs == UtilizationScore::worst() && rhs < UtilizationScore::worst() =>
|
||||
{
|
||||
ToActive
|
||||
}
|
||||
_ => Unchanged,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use crate::{
|
||||
id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, WrappedWriteGuard},
|
||||
persistence::{AbortShardSplitStatus, TenantFilter},
|
||||
reconciler::{ReconcileError, ReconcileUnits},
|
||||
scheduler::{ScheduleContext, ScheduleMode},
|
||||
scheduler::{MaySchedule, ScheduleContext, ScheduleMode},
|
||||
tenant_shard::{
|
||||
MigrateAttachment, ReconcileNeeded, ScheduleOptimization, ScheduleOptimizationAction,
|
||||
},
|
||||
@@ -747,29 +747,61 @@ impl Service {
|
||||
let res = self.heartbeater.heartbeat(nodes).await;
|
||||
if let Ok(deltas) = res {
|
||||
for (node_id, state) in deltas.0 {
|
||||
let new_availability = match state {
|
||||
PageserverState::Available { utilization, .. } => NodeAvailability::Active(
|
||||
UtilizationScore(utilization.utilization_score),
|
||||
let (new_node, new_availability) = match state {
|
||||
PageserverState::Available {
|
||||
utilization, new, ..
|
||||
} => (
|
||||
new,
|
||||
NodeAvailability::Active(UtilizationScore(
|
||||
utilization.utilization_score,
|
||||
)),
|
||||
),
|
||||
PageserverState::Offline => NodeAvailability::Offline,
|
||||
PageserverState::Offline => (false, NodeAvailability::Offline),
|
||||
};
|
||||
let res = self
|
||||
.node_configure(node_id, Some(new_availability), None)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(()) => {}
|
||||
Err(ApiError::NotFound(_)) => {
|
||||
// This should be rare, but legitimate since the heartbeats are done
|
||||
// on a snapshot of the nodes.
|
||||
tracing::info!("Node {} was not found after heartbeat round", node_id);
|
||||
if new_node {
|
||||
// When the heartbeats detect a newly added node, we don't wish
|
||||
// to attempt to reconcile the shards assigned to it. The node
|
||||
// is likely handling it's re-attach response, so reconciling now
|
||||
// would be counterproductive.
|
||||
//
|
||||
// Instead, update the in-memory state with the details learned about the
|
||||
// node.
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let (nodes, _tenants, scheduler) = locked.parts_mut();
|
||||
|
||||
let mut new_nodes = (**nodes).clone();
|
||||
|
||||
if let Some(node) = new_nodes.get_mut(&node_id) {
|
||||
node.set_availability(new_availability);
|
||||
scheduler.node_upsert(node);
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!(
|
||||
"Failed to update node {} after heartbeat round: {}",
|
||||
node_id,
|
||||
err
|
||||
);
|
||||
|
||||
locked.nodes = Arc::new(new_nodes);
|
||||
} else {
|
||||
// This is the code path for geniune availability transitions (i.e node
|
||||
// goes unavailable and/or comes back online).
|
||||
let res = self
|
||||
.node_configure(node_id, Some(new_availability), None)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(()) => {}
|
||||
Err(ApiError::NotFound(_)) => {
|
||||
// This should be rare, but legitimate since the heartbeats are done
|
||||
// on a snapshot of the nodes.
|
||||
tracing::info!(
|
||||
"Node {} was not found after heartbeat round",
|
||||
node_id
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!(
|
||||
"Failed to update node {} after heartbeat round: {}",
|
||||
node_id,
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4316,6 +4348,16 @@ impl Service {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !new_nodes
|
||||
.values()
|
||||
.any(|n| matches!(n.may_schedule(), MaySchedule::Yes(_)))
|
||||
{
|
||||
// Special case for when all nodes are unavailable and/or unschedulable: there is no point
|
||||
// trying to reschedule since there's nowhere else to go. Without this
|
||||
// branch we incorrectly detach tenants in response to node unavailability.
|
||||
continue;
|
||||
}
|
||||
|
||||
if tenant_shard.intent.demote_attached(scheduler, node_id) {
|
||||
tenant_shard.sequence = tenant_shard.sequence.next();
|
||||
|
||||
@@ -4353,6 +4395,12 @@ impl Service {
|
||||
// When a node comes back online, we must reconcile any tenant that has a None observed
|
||||
// location on the node.
|
||||
for tenant_shard in locked.tenants.values_mut() {
|
||||
// If a reconciliation is already in progress, rely on the previous scheduling
|
||||
// decision and skip triggering a new reconciliation.
|
||||
if tenant_shard.reconciler.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(observed_loc) = tenant_shard.observed.locations.get_mut(&node_id) {
|
||||
if observed_loc.conf.is_none() {
|
||||
self.maybe_reconcile_shard(tenant_shard, &new_nodes);
|
||||
|
||||
15
test_runner/performance/pgvector/halfvec_build.sql
Normal file
15
test_runner/performance/pgvector/halfvec_build.sql
Normal file
@@ -0,0 +1,15 @@
|
||||
DROP TABLE IF EXISTS halfvec_test_table;
|
||||
|
||||
CREATE TABLE halfvec_test_table (
|
||||
_id text NOT NULL,
|
||||
title text,
|
||||
text text,
|
||||
embeddings halfvec(1536),
|
||||
PRIMARY KEY (_id)
|
||||
);
|
||||
|
||||
INSERT INTO halfvec_test_table (_id, title, text, embeddings)
|
||||
SELECT _id, title, text, embeddings::halfvec
|
||||
FROM documents;
|
||||
|
||||
CREATE INDEX documents_half_precision_hnsw_idx ON halfvec_test_table USING hnsw (embeddings halfvec_cosine_ops) WITH (m = 64, ef_construction = 128);
|
||||
@@ -0,0 +1,13 @@
|
||||
-- run with pooled connection
|
||||
-- pgbench -T 300 -c 100 -j20 -f pgbench_halfvec_queries.sql -postgresql://neondb_owner:<secret>@ep-floral-thunder-w1gzhaxi-pooler.eu-west-1.aws.neon.build/neondb?sslmode=require"
|
||||
|
||||
with x (x) as (
|
||||
select "embeddings" as x
|
||||
from halfvec_test_table
|
||||
TABLESAMPLE SYSTEM (1)
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT title, "embeddings" <=> (select x from x) as distance
|
||||
FROM halfvec_test_table
|
||||
ORDER BY 2
|
||||
LIMIT 30;
|
||||
@@ -1,13 +0,0 @@
|
||||
-- run with pooled connection
|
||||
-- pgbench -T 300 -c 100 -j20 -f pgbench_hnsw_queries.sql -postgresql://neondb_owner:<secret>@ep-floral-thunder-w1gzhaxi-pooler.eu-west-1.aws.neon.build/neondb?sslmode=require"
|
||||
|
||||
with x (x) as (
|
||||
select "embeddings" as x
|
||||
from hnsw_test_table
|
||||
TABLESAMPLE SYSTEM (1)
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT title, "embeddings" <=> (select x from x) as distance
|
||||
FROM hnsw_test_table
|
||||
ORDER BY 2
|
||||
LIMIT 30;
|
||||
@@ -106,6 +106,7 @@ QUERIES: Tuple[LabelledQuery, ...] = (
|
||||
# Disable auto formatting for the list of queries so that it's easier to read
|
||||
# fmt: off
|
||||
PGVECTOR_QUERIES: Tuple[LabelledQuery, ...] = (
|
||||
LabelledQuery("PGVPREP", r"ALTER EXTENSION VECTOR UPDATE;"),
|
||||
LabelledQuery("PGV0", r"DROP TABLE IF EXISTS hnsw_test_table;"),
|
||||
LabelledQuery("PGV1", r"CREATE TABLE hnsw_test_table AS TABLE documents WITH NO DATA;"),
|
||||
LabelledQuery("PGV2", r"INSERT INTO hnsw_test_table SELECT * FROM documents;"),
|
||||
@@ -115,6 +116,10 @@ PGVECTOR_QUERIES: Tuple[LabelledQuery, ...] = (
|
||||
LabelledQuery("PGV6", r"CREATE INDEX ON hnsw_test_table USING hnsw (embeddings vector_l1_ops);"),
|
||||
LabelledQuery("PGV7", r"CREATE INDEX ON hnsw_test_table USING hnsw ((binary_quantize(embeddings)::bit(1536)) bit_hamming_ops);"),
|
||||
LabelledQuery("PGV8", r"CREATE INDEX ON hnsw_test_table USING hnsw ((binary_quantize(embeddings)::bit(1536)) bit_jaccard_ops);"),
|
||||
LabelledQuery("PGV9", r"DROP TABLE IF EXISTS halfvec_test_table;"),
|
||||
LabelledQuery("PGV10", r"CREATE TABLE halfvec_test_table (_id text NOT NULL, title text, text text, embeddings halfvec(1536), PRIMARY KEY (_id));"),
|
||||
LabelledQuery("PGV11", r"INSERT INTO halfvec_test_table (_id, title, text, embeddings) SELECT _id, title, text, embeddings::halfvec FROM documents;"),
|
||||
LabelledQuery("PGV12", r"CREATE INDEX documents_half_precision_hnsw_idx ON halfvec_test_table USING hnsw (embeddings halfvec_cosine_ops) WITH (m = 64, ef_construction = 128);"),
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ class PgBenchLoadType(enum.Enum):
|
||||
SIMPLE_UPDATE = "simple-update"
|
||||
SELECT_ONLY = "select-only"
|
||||
PGVECTOR_HNSW = "pgvector-hnsw"
|
||||
PGVECTOR_HALFVEC = "pgvector-halfvec"
|
||||
|
||||
|
||||
def utc_now_timestamp() -> int:
|
||||
@@ -153,6 +154,26 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: P
|
||||
password=password,
|
||||
)
|
||||
|
||||
if workload_type == PgBenchLoadType.PGVECTOR_HALFVEC:
|
||||
# Run simple-update workload
|
||||
run_pgbench(
|
||||
env,
|
||||
"pgvector-halfvec",
|
||||
[
|
||||
"pgbench",
|
||||
"-f",
|
||||
"test_runner/performance/pgvector/pgbench_custom_script_pgvector_halfvec_queries.sql",
|
||||
"-c100",
|
||||
"-j20",
|
||||
f"-T{duration}",
|
||||
"-P2",
|
||||
"--protocol=prepared",
|
||||
"--progress-timestamp",
|
||||
connstr,
|
||||
],
|
||||
password=password,
|
||||
)
|
||||
|
||||
env.report_size()
|
||||
|
||||
|
||||
@@ -222,13 +243,3 @@ def test_pgbench_remote_simple_update(remote_compare: PgCompare, scale: int, dur
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_select_only(remote_compare: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.SELECT_ONLY)
|
||||
|
||||
|
||||
# The following test runs on an existing database that has pgvector extension installed
|
||||
# and a table with 1 million embedding vectors loaded and indexed with HNSW.
|
||||
#
|
||||
# Run this pgbench tests against an existing remote Postgres cluster with the necessary setup.
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_pgvector(remote_compare: PgCompare, duration: int):
|
||||
run_test_pgbench(remote_compare, 1, duration, PgBenchLoadType.PGVECTOR_HNSW)
|
||||
|
||||
24
test_runner/performance/test_perf_pgvector_queries.py
Normal file
24
test_runner/performance/test_perf_pgvector_queries.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import pytest
|
||||
from fixtures.compare_fixtures import PgCompare
|
||||
|
||||
from performance.test_perf_pgbench import PgBenchLoadType, get_durations_matrix, run_test_pgbench
|
||||
|
||||
|
||||
# The following test runs on an existing database that has pgvector extension installed
|
||||
# and a table with 1 million embedding vectors loaded and indexed with HNSW.
|
||||
#
|
||||
# Run this pgbench tests against an existing remote Postgres cluster with the necessary setup.
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_pgvector_hnsw(remote_compare: PgCompare, duration: int):
|
||||
run_test_pgbench(remote_compare, 1, duration, PgBenchLoadType.PGVECTOR_HNSW)
|
||||
|
||||
|
||||
# The following test runs on an existing database that has pgvector extension installed
|
||||
# and a table with 1 million embedding vectors loaded and indexed with halfvec.
|
||||
#
|
||||
# Run this pgbench tests against an existing remote Postgres cluster with the necessary setup.
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_pgvector_halfvec(remote_compare: PgCompare, duration: int):
|
||||
run_test_pgbench(remote_compare, 1, duration, PgBenchLoadType.PGVECTOR_HALFVEC)
|
||||
@@ -53,6 +53,25 @@ def test_proxy_select_1(static_proxy: NeonProxy):
|
||||
assert out[0][0] == 42
|
||||
|
||||
|
||||
def test_proxy_server_params(static_proxy: NeonProxy):
|
||||
"""
|
||||
Test that server params are passing through to postgres
|
||||
"""
|
||||
|
||||
out = static_proxy.safe_psql(
|
||||
"select to_json('0 seconds'::interval)", options="-c intervalstyle=iso_8601"
|
||||
)
|
||||
assert out[0][0] == "PT0S"
|
||||
out = static_proxy.safe_psql(
|
||||
"select to_json('0 seconds'::interval)", options="-c intervalstyle=sql_standard"
|
||||
)
|
||||
assert out[0][0] == "0"
|
||||
out = static_proxy.safe_psql(
|
||||
"select to_json('0 seconds'::interval)", options="-c intervalstyle=postgres"
|
||||
)
|
||||
assert out[0][0] == "00:00:00"
|
||||
|
||||
|
||||
def test_password_hack(static_proxy: NeonProxy):
|
||||
"""
|
||||
Check the PasswordHack auth flow: an alternative to SCRAM auth for
|
||||
|
||||
@@ -934,19 +934,27 @@ class Failure:
|
||||
def clear(self, env: NeonEnv):
|
||||
raise NotImplementedError()
|
||||
|
||||
def nodes(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class NodeStop(Failure):
|
||||
def __init__(self, pageserver_id, immediate):
|
||||
self.pageserver_id = pageserver_id
|
||||
def __init__(self, pageserver_ids, immediate):
|
||||
self.pageserver_ids = pageserver_ids
|
||||
self.immediate = immediate
|
||||
|
||||
def apply(self, env: NeonEnv):
|
||||
pageserver = env.get_pageserver(self.pageserver_id)
|
||||
pageserver.stop(immediate=self.immediate)
|
||||
for ps_id in self.pageserver_ids:
|
||||
pageserver = env.get_pageserver(ps_id)
|
||||
pageserver.stop(immediate=self.immediate)
|
||||
|
||||
def clear(self, env: NeonEnv):
|
||||
pageserver = env.get_pageserver(self.pageserver_id)
|
||||
pageserver.start()
|
||||
for ps_id in self.pageserver_ids:
|
||||
pageserver = env.get_pageserver(ps_id)
|
||||
pageserver.start()
|
||||
|
||||
def nodes(self):
|
||||
return self.pageserver_ids
|
||||
|
||||
|
||||
class PageserverFailpoint(Failure):
|
||||
@@ -962,6 +970,9 @@ class PageserverFailpoint(Failure):
|
||||
pageserver = env.get_pageserver(self.pageserver_id)
|
||||
pageserver.http_client().configure_failpoints((self.failpoint, "off"))
|
||||
|
||||
def nodes(self):
|
||||
return [self.pageserver_id]
|
||||
|
||||
|
||||
def build_node_to_tenants_map(env: NeonEnv) -> dict[int, list[TenantId]]:
|
||||
tenants = env.storage_controller.tenant_list()
|
||||
@@ -985,8 +996,9 @@ def build_node_to_tenants_map(env: NeonEnv) -> dict[int, list[TenantId]]:
|
||||
@pytest.mark.parametrize(
|
||||
"failure",
|
||||
[
|
||||
NodeStop(pageserver_id=1, immediate=False),
|
||||
NodeStop(pageserver_id=1, immediate=True),
|
||||
NodeStop(pageserver_ids=[1], immediate=False),
|
||||
NodeStop(pageserver_ids=[1], immediate=True),
|
||||
NodeStop(pageserver_ids=[1, 2], immediate=True),
|
||||
PageserverFailpoint(pageserver_id=1, failpoint="get-utilization-http-handler"),
|
||||
],
|
||||
)
|
||||
@@ -1039,33 +1051,50 @@ def test_storage_controller_heartbeats(
|
||||
wait_until(10, 1, tenants_placed)
|
||||
|
||||
# ... then we apply the failure
|
||||
offline_node_id = failure.pageserver_id
|
||||
online_node_id = (set(range(1, len(env.pageservers) + 1)) - {offline_node_id}).pop()
|
||||
env.get_pageserver(offline_node_id).allowed_errors.append(
|
||||
# In the case of the failpoint failure, the impacted pageserver
|
||||
# still believes it has the tenant attached since location
|
||||
# config calls into it will fail due to being marked offline.
|
||||
".*Dropped remote consistent LSN updates.*",
|
||||
)
|
||||
offline_node_ids = set(failure.nodes())
|
||||
online_node_ids = set(range(1, len(env.pageservers) + 1)) - offline_node_ids
|
||||
|
||||
for node_id in offline_node_ids:
|
||||
env.get_pageserver(node_id).allowed_errors.append(
|
||||
# In the case of the failpoint failure, the impacted pageserver
|
||||
# still believes it has the tenant attached since location
|
||||
# config calls into it will fail due to being marked offline.
|
||||
".*Dropped remote consistent LSN updates.*",
|
||||
)
|
||||
|
||||
if len(offline_node_ids) > 1:
|
||||
env.get_pageserver(node_id).allowed_errors.append(
|
||||
".*Scheduling error when marking pageserver.*offline.*",
|
||||
)
|
||||
|
||||
failure.apply(env)
|
||||
|
||||
# ... expecting the heartbeats to mark it offline
|
||||
def node_offline():
|
||||
def nodes_offline():
|
||||
nodes = env.storage_controller.node_list()
|
||||
log.info(f"{nodes=}")
|
||||
target = next(n for n in nodes if n["id"] == offline_node_id)
|
||||
assert target["availability"] == "Offline"
|
||||
for node in nodes:
|
||||
if node["id"] in offline_node_ids:
|
||||
assert node["availability"] == "Offline"
|
||||
|
||||
# A node is considered offline if the last successful heartbeat
|
||||
# was more than 10 seconds ago (hardcoded in the storage controller).
|
||||
wait_until(20, 1, node_offline)
|
||||
wait_until(20, 1, nodes_offline)
|
||||
|
||||
# .. expecting the tenant on the offline node to be migrated
|
||||
def tenant_migrated():
|
||||
if len(online_node_ids) == 0:
|
||||
time.sleep(5)
|
||||
return
|
||||
|
||||
node_to_tenants = build_node_to_tenants_map(env)
|
||||
log.info(f"{node_to_tenants=}")
|
||||
assert set(node_to_tenants[online_node_id]) == set(tenant_ids)
|
||||
|
||||
observed_tenants = set()
|
||||
for node_id in online_node_ids:
|
||||
observed_tenants |= set(node_to_tenants[node_id])
|
||||
|
||||
assert observed_tenants == set(tenant_ids)
|
||||
|
||||
wait_until(10, 1, tenant_migrated)
|
||||
|
||||
@@ -1073,31 +1102,24 @@ def test_storage_controller_heartbeats(
|
||||
failure.clear(env)
|
||||
|
||||
# ... expecting the offline node to become active again
|
||||
def node_online():
|
||||
def nodes_online():
|
||||
nodes = env.storage_controller.node_list()
|
||||
target = next(n for n in nodes if n["id"] == offline_node_id)
|
||||
assert target["availability"] == "Active"
|
||||
for node in nodes:
|
||||
if node["id"] in online_node_ids:
|
||||
assert node["availability"] == "Active"
|
||||
|
||||
wait_until(10, 1, node_online)
|
||||
wait_until(10, 1, nodes_online)
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
# ... then we create a new tenant
|
||||
tid = TenantId.generate()
|
||||
env.storage_controller.tenant_create(tid)
|
||||
|
||||
# ... expecting it to be placed on the node that just came back online
|
||||
tenants = env.storage_controller.tenant_list()
|
||||
newest_tenant = next(t for t in tenants if t["tenant_shard_id"] == str(tid))
|
||||
locations = list(newest_tenant["observed"]["locations"].keys())
|
||||
locations = [int(node_id) for node_id in locations]
|
||||
assert locations == [offline_node_id]
|
||||
node_to_tenants = build_node_to_tenants_map(env)
|
||||
log.info(f"Back online: {node_to_tenants=}")
|
||||
|
||||
# ... expecting the storage controller to reach a consistent state
|
||||
def storage_controller_consistent():
|
||||
env.storage_controller.consistency_check()
|
||||
|
||||
wait_until(10, 1, storage_controller_consistent)
|
||||
wait_until(30, 1, storage_controller_consistent)
|
||||
|
||||
|
||||
def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
Reference in New Issue
Block a user