mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-07 04:30:36 +00:00
Compare commits
16 Commits
ephemerals
...
conrad/pro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
631139ceeb | ||
|
|
fd43058bd7 | ||
|
|
cf07c5b5f9 | ||
|
|
11bb84c38d | ||
|
|
219c72c24c | ||
|
|
0633cd6385 | ||
|
|
0cdb0c5704 | ||
|
|
eefac5d78b | ||
|
|
7d1c908b1b | ||
|
|
cfa2813446 | ||
|
|
034bdb1552 | ||
|
|
8b1ffa1718 | ||
|
|
2d3ea77953 | ||
|
|
3124729f53 | ||
|
|
6463eb38be | ||
|
|
ae506fd791 |
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -4236,7 +4236,6 @@ name = "pagebench"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"camino",
|
||||
"clap",
|
||||
"futures",
|
||||
@@ -4245,15 +4244,12 @@ dependencies = [
|
||||
"humantime-serde",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"pageserver_page_api",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tonic 0.13.1",
|
||||
"tracing",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
@@ -4309,7 +4305,6 @@ dependencies = [
|
||||
"hashlink",
|
||||
"hex",
|
||||
"hex-literal",
|
||||
"http 1.1.0",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
@@ -4372,7 +4367,6 @@ dependencies = [
|
||||
"toml_edit",
|
||||
"tonic 0.13.1",
|
||||
"tonic-reflection",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"tracing-utils",
|
||||
"twox-hash",
|
||||
@@ -4469,6 +4463,7 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"prost 0.13.5",
|
||||
"smallvec",
|
||||
"thiserror 1.0.69",
|
||||
"tonic 0.13.1",
|
||||
"tonic-build",
|
||||
|
||||
@@ -310,13 +310,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
. "$HOME/.cargo/env" && \
|
||||
cargo --version && rustup --version && \
|
||||
rustup component add llvm-tools rustfmt clippy && \
|
||||
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
|
||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
|
||||
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
|
||||
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
|
||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
|
||||
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
|
||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
|
||||
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
|
||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
rm -rf /home/nonroot/.cargo/registry && \
|
||||
rm -rf /home/nonroot/.cargo/git
|
||||
|
||||
@@ -1180,14 +1180,14 @@ RUN cd exts/rag && \
|
||||
RUN cd exts/rag_bge_small_en_v15 && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/bge_small_en_v15.onnx \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control
|
||||
|
||||
RUN cd exts/rag_jina_reranker_v1_tiny_en && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/jina_reranker_v1_tiny_en.onnx \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control
|
||||
|
||||
|
||||
@@ -57,6 +57,21 @@ use tracing::{error, info};
|
||||
use url::Url;
|
||||
use utils::failpoint_support;
|
||||
|
||||
// Compatibility hack: if the control plane specified any remote-ext-config
|
||||
// use the default value for extension storage proxy gateway.
|
||||
// Remove this once the control plane is updated to pass the gateway URL
|
||||
fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
|
||||
const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
|
||||
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
|
||||
|
||||
Ok(if arg.starts_with("http") {
|
||||
arg
|
||||
} else {
|
||||
FALLBACK_PG_EXT_GATEWAY_BASE_URL
|
||||
}
|
||||
.to_owned())
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(rename_all = "kebab-case")]
|
||||
struct Cli {
|
||||
@@ -64,8 +79,9 @@ struct Cli {
|
||||
pub pgbin: String,
|
||||
|
||||
/// The base URL for the remote extension storage proxy gateway.
|
||||
#[arg(short = 'r', long)]
|
||||
pub remote_ext_base_url: Option<Url>,
|
||||
/// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
|
||||
#[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
|
||||
pub remote_ext_base_url: Option<String>,
|
||||
|
||||
/// The port to bind the external listening HTTP server to. Clients running
|
||||
/// outside the compute will talk to the compute through this port. Keep
|
||||
@@ -260,4 +276,18 @@ mod test {
|
||||
fn verify_cli() {
|
||||
Cli::command().debug_assert()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_pg_ext_gateway_base_url() {
|
||||
let arg = "http://pg-ext-s3-gateway2";
|
||||
let result = super::parse_remote_ext_base_url(arg).unwrap();
|
||||
assert_eq!(result, arg);
|
||||
|
||||
let arg = "pg-ext-s3-gateway";
|
||||
let result = super::parse_remote_ext_base_url(arg).unwrap();
|
||||
assert_eq!(
|
||||
result,
|
||||
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -339,8 +339,6 @@ async fn run_dump_restore(
|
||||
destination_connstring: String,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let dumpdir = workdir.join("dumpdir");
|
||||
let num_jobs = num_cpus::get().to_string();
|
||||
info!("using {num_jobs} jobs for dump/restore");
|
||||
|
||||
let common_args = [
|
||||
// schema mapping (prob suffices to specify them on one side)
|
||||
@@ -356,7 +354,7 @@ async fn run_dump_restore(
|
||||
"directory".to_string(),
|
||||
// concurrency
|
||||
"--jobs".to_string(),
|
||||
num_jobs,
|
||||
num_cpus::get().to_string(),
|
||||
// progress updates
|
||||
"--verbose".to_string(),
|
||||
];
|
||||
|
||||
@@ -31,7 +31,6 @@ use std::time::{Duration, Instant};
|
||||
use std::{env, fs};
|
||||
use tokio::spawn;
|
||||
use tracing::{Instrument, debug, error, info, instrument, warn};
|
||||
use url::Url;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::measured_stream::MeasuredReader;
|
||||
@@ -97,7 +96,7 @@ pub struct ComputeNodeParams {
|
||||
pub internal_http_port: u16,
|
||||
|
||||
/// the address of extension storage proxy gateway
|
||||
pub remote_ext_base_url: Option<Url>,
|
||||
pub remote_ext_base_url: Option<String>,
|
||||
|
||||
/// Interval for installed extensions collection
|
||||
pub installed_extensions_collection_interval: u64,
|
||||
|
||||
@@ -83,7 +83,6 @@ use reqwest::StatusCode;
|
||||
use tar::Archive;
|
||||
use tracing::info;
|
||||
use tracing::log::warn;
|
||||
use url::Url;
|
||||
use zstd::stream::read::Decoder;
|
||||
|
||||
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
@@ -159,14 +158,14 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
|
||||
pub async fn download_extension(
|
||||
ext_name: &str,
|
||||
ext_path: &RemotePath,
|
||||
remote_ext_base_url: &Url,
|
||||
remote_ext_base_url: &str,
|
||||
pgbin: &str,
|
||||
) -> Result<u64> {
|
||||
info!("Download extension {:?} from {:?}", ext_name, ext_path);
|
||||
|
||||
// TODO add retry logic
|
||||
let download_buffer =
|
||||
match download_extension_tar(remote_ext_base_url.as_str(), &ext_path.to_string()).await {
|
||||
match download_extension_tar(remote_ext_base_url, &ext_path.to_string()).await {
|
||||
Ok(buffer) => buffer,
|
||||
Err(error_message) => {
|
||||
return Err(anyhow::anyhow!(
|
||||
|
||||
@@ -107,7 +107,7 @@ impl<const N: usize> MetricType for HyperLogLogState<N> {
|
||||
}
|
||||
|
||||
impl<const N: usize> HyperLogLogState<N> {
|
||||
pub fn measure(&self, item: &(impl Hash + ?Sized)) {
|
||||
pub fn measure(&self, item: &impl Hash) {
|
||||
// changing the hasher will break compatibility with previous measurements.
|
||||
self.record(BuildHasherDefault::<xxh3::Hash64>::default().hash_one(item));
|
||||
}
|
||||
|
||||
@@ -27,7 +27,6 @@ pub use prometheus::{
|
||||
|
||||
pub mod launch_timestamp;
|
||||
mod wrappers;
|
||||
pub use prometheus;
|
||||
pub use wrappers::{CountedReader, CountedWriter};
|
||||
mod hll;
|
||||
pub use hll::{HyperLogLog, HyperLogLogState, HyperLogLogVec};
|
||||
|
||||
@@ -181,7 +181,6 @@ pub struct ConfigToml {
|
||||
pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
|
||||
pub ingest_batch_size: u64,
|
||||
pub max_vectored_read_bytes: MaxVectoredReadBytes,
|
||||
pub max_get_vectored_keys: MaxGetVectoredKeys,
|
||||
pub image_compression: ImageCompressionAlgorithm,
|
||||
pub timeline_offloading: bool,
|
||||
pub ephemeral_bytes_per_memory_kb: usize,
|
||||
@@ -230,7 +229,7 @@ pub enum PageServicePipeliningConfig {
|
||||
}
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PageServicePipeliningConfigPipelined {
|
||||
/// Failed config parsing and validation if larger than `max_get_vectored_keys`.
|
||||
/// Causes runtime errors if larger than max get_vectored batch size.
|
||||
pub max_batch_size: NonZeroUsize,
|
||||
pub execution: PageServiceProtocolPipelinedExecutionStrategy,
|
||||
// The default below is such that new versions of the software can start
|
||||
@@ -404,16 +403,6 @@ impl Default for EvictionOrder {
|
||||
#[serde(transparent)]
|
||||
pub struct MaxVectoredReadBytes(pub NonZeroUsize);
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct MaxGetVectoredKeys(NonZeroUsize);
|
||||
|
||||
impl MaxGetVectoredKeys {
|
||||
pub fn get(&self) -> usize {
|
||||
self.0.get()
|
||||
}
|
||||
}
|
||||
|
||||
/// Tenant-level configuration values, used for various purposes.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(default)]
|
||||
@@ -598,8 +587,6 @@ pub mod defaults {
|
||||
/// That is, slightly above 128 kB.
|
||||
pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
|
||||
|
||||
pub const DEFAULT_MAX_GET_VECTORED_KEYS: usize = 32;
|
||||
|
||||
pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
|
||||
ImageCompressionAlgorithm::Zstd { level: Some(1) };
|
||||
|
||||
@@ -608,10 +595,7 @@ pub mod defaults {
|
||||
pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
|
||||
|
||||
pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
|
||||
utils::postgres_client::PostgresClientProtocol::Interpreted {
|
||||
format: utils::postgres_client::InterpretedFormat::Protobuf,
|
||||
compression: Some(utils::postgres_client::Compression::Zstd { level: 1 }),
|
||||
};
|
||||
utils::postgres_client::PostgresClientProtocol::Vanilla;
|
||||
|
||||
pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
|
||||
pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
|
||||
@@ -701,9 +685,6 @@ impl Default for ConfigToml {
|
||||
max_vectored_read_bytes: (MaxVectoredReadBytes(
|
||||
NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
|
||||
)),
|
||||
max_get_vectored_keys: (MaxGetVectoredKeys(
|
||||
NonZeroUsize::new(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap(),
|
||||
)),
|
||||
image_compression: (DEFAULT_IMAGE_COMPRESSION),
|
||||
timeline_offloading: true,
|
||||
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||
@@ -732,9 +713,9 @@ impl Default for ConfigToml {
|
||||
enable_tls_page_service_api: false,
|
||||
dev_mode: false,
|
||||
timeline_import_config: TimelineImportConfig {
|
||||
import_job_concurrency: NonZeroUsize::new(32).unwrap(),
|
||||
import_job_soft_size_limit: NonZeroUsize::new(256 * 1024 * 1024).unwrap(),
|
||||
import_job_checkpoint_threshold: NonZeroUsize::new(32).unwrap(),
|
||||
import_job_concurrency: NonZeroUsize::new(128).unwrap(),
|
||||
import_job_soft_size_limit: NonZeroUsize::new(1024 * 1024 * 1024).unwrap(),
|
||||
import_job_checkpoint_threshold: NonZeroUsize::new(128).unwrap(),
|
||||
},
|
||||
basebackup_cache_config: None,
|
||||
posthog_config: None,
|
||||
|
||||
@@ -1934,7 +1934,7 @@ pub enum PagestreamFeMessage {
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug, strum_macros::EnumProperty)]
|
||||
#[derive(strum_macros::EnumProperty)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
@@ -2045,7 +2045,7 @@ pub enum PagestreamProtocolVersion {
|
||||
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
@@ -2064,7 +2064,7 @@ pub struct PagestreamNblocksRequest {
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
|
||||
@@ -24,7 +24,7 @@ use serde::{Deserialize, Serialize};
|
||||
// FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
|
||||
// Then we could replace the custom Ord and PartialOrd implementations below with
|
||||
// deriving them. This will require changes in walredoproc.c.
|
||||
#[derive(Debug, Default, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct RelTag {
|
||||
pub forknum: u8,
|
||||
pub spcnode: Oid,
|
||||
@@ -184,12 +184,12 @@ pub enum SlruKind {
|
||||
MultiXactOffsets,
|
||||
}
|
||||
|
||||
impl fmt::Display for SlruKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
impl SlruKind {
|
||||
pub fn to_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Clog => write!(f, "pg_xact"),
|
||||
Self::MultiXactMembers => write!(f, "pg_multixact/members"),
|
||||
Self::MultiXactOffsets => write!(f, "pg_multixact/offsets"),
|
||||
Self::Clog => "pg_xact",
|
||||
Self::MultiXactMembers => "pg_multixact/members",
|
||||
Self::MultiXactOffsets => "pg_multixact/offsets",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::{sync::Arc, time::Duration};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{Instrument, info_span};
|
||||
|
||||
use crate::{FeatureStore, PostHogClient, PostHogClientConfig};
|
||||
|
||||
@@ -27,35 +26,31 @@ impl FeatureResolverBackgroundLoop {
|
||||
pub fn spawn(self: Arc<Self>, handle: &tokio::runtime::Handle, refresh_period: Duration) {
|
||||
let this = self.clone();
|
||||
let cancel = self.cancel.clone();
|
||||
handle.spawn(
|
||||
async move {
|
||||
tracing::info!("Starting PostHog feature resolver");
|
||||
let mut ticker = tokio::time::interval(refresh_period);
|
||||
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = ticker.tick() => {}
|
||||
_ = cancel.cancelled() => break
|
||||
}
|
||||
let resp = match this
|
||||
.posthog_client
|
||||
.get_feature_flags_local_evaluation()
|
||||
.await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(e) => {
|
||||
tracing::warn!("Cannot get feature flags: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let feature_store = FeatureStore::new_with_flags(resp.flags);
|
||||
this.feature_store.store(Arc::new(feature_store));
|
||||
tracing::info!("Feature flag updated");
|
||||
handle.spawn(async move {
|
||||
tracing::info!("Starting PostHog feature resolver");
|
||||
let mut ticker = tokio::time::interval(refresh_period);
|
||||
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = ticker.tick() => {}
|
||||
_ = cancel.cancelled() => break
|
||||
}
|
||||
tracing::info!("PostHog feature resolver stopped");
|
||||
let resp = match this
|
||||
.posthog_client
|
||||
.get_feature_flags_local_evaluation()
|
||||
.await
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(e) => {
|
||||
tracing::warn!("Cannot get feature flags: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let feature_store = FeatureStore::new_with_flags(resp.flags);
|
||||
this.feature_store.store(Arc::new(feature_store));
|
||||
}
|
||||
.instrument(info_span!("posthog_feature_resolver")),
|
||||
);
|
||||
tracing::info!("PostHog feature resolver stopped");
|
||||
});
|
||||
}
|
||||
|
||||
pub fn feature_store(&self) -> Arc<FeatureStore> {
|
||||
|
||||
@@ -448,18 +448,6 @@ impl FeatureStore {
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Infer whether a feature flag is a boolean flag by checking if it has a multivariate filter.
|
||||
pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {
|
||||
if let Some(flag_config) = self.flags.get(flag_key) {
|
||||
Ok(flag_config.filters.multivariate.is_none())
|
||||
} else {
|
||||
Err(PostHogEvaluationError::NotAvailable(format!(
|
||||
"Not found in the local evaluation spec: {}",
|
||||
flag_key
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PostHogClientConfig {
|
||||
@@ -540,15 +528,7 @@ impl PostHogClient {
|
||||
.bearer_auth(&self.config.server_api_key)
|
||||
.send()
|
||||
.await?;
|
||||
let status = response.status();
|
||||
let body = response.text().await?;
|
||||
if !status.is_success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Failed to get feature flags: {}, {}",
|
||||
status,
|
||||
body
|
||||
));
|
||||
}
|
||||
Ok(serde_json::from_str(&body)?)
|
||||
}
|
||||
|
||||
|
||||
@@ -28,7 +28,6 @@ use std::time::Duration;
|
||||
use tokio::sync::Notify;
|
||||
use tokio::time::Instant;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct LeakyBucketConfig {
|
||||
/// This is the "time cost" of a single request unit.
|
||||
/// Should loosely represent how long it takes to handle a request unit in active resource time.
|
||||
|
||||
@@ -73,7 +73,6 @@ pub mod error;
|
||||
/// async timeout helper
|
||||
pub mod timeout;
|
||||
|
||||
pub mod span;
|
||||
pub mod sync;
|
||||
|
||||
pub mod failpoint_support;
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
//! Tracing span helpers.
|
||||
|
||||
/// Records the given fields in the current span, as a single call. The fields must already have
|
||||
/// been declared for the span (typically with empty values).
|
||||
#[macro_export]
|
||||
macro_rules! span_record {
|
||||
($($tokens:tt)*) => {$crate::span_record_in!(::tracing::Span::current(), $($tokens)*)};
|
||||
}
|
||||
|
||||
/// Records the given fields in the given span, as a single call. The fields must already have been
|
||||
/// declared for the span (typically with empty values).
|
||||
#[macro_export]
|
||||
macro_rules! span_record_in {
|
||||
($span:expr, $($tokens:tt)*) => {
|
||||
if let Some(meta) = $span.metadata() {
|
||||
$span.record_all(&tracing::valueset!(meta.fields(), $($tokens)*));
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -34,7 +34,6 @@ fail.workspace = true
|
||||
futures.workspace = true
|
||||
hashlink.workspace = true
|
||||
hex.workspace = true
|
||||
http.workspace = true
|
||||
http-utils.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
humantime.workspace = true
|
||||
@@ -94,7 +93,6 @@ tokio-util.workspace = true
|
||||
toml_edit = { workspace = true, features = [ "serde" ] }
|
||||
tonic.workspace = true
|
||||
tonic-reflection.workspace = true
|
||||
tower.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
@@ -264,56 +264,10 @@ mod propagation_of_cached_label_value {
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(histograms, histograms::bench_bucket_scalability);
|
||||
mod histograms {
|
||||
use std::time::Instant;
|
||||
|
||||
use criterion::{BenchmarkId, Criterion};
|
||||
use metrics::core::Collector;
|
||||
|
||||
pub fn bench_bucket_scalability(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("bucket_scalability");
|
||||
|
||||
for n in [1, 4, 8, 16, 32, 64, 128, 256] {
|
||||
g.bench_with_input(BenchmarkId::new("nbuckets", n), &n, |b, n| {
|
||||
b.iter_custom(|iters| {
|
||||
let buckets: Vec<f64> = (0..*n).map(|i| i as f64 * 100.0).collect();
|
||||
let histo = metrics::Histogram::with_opts(
|
||||
metrics::prometheus::HistogramOpts::new("name", "help")
|
||||
.buckets(buckets.clone()),
|
||||
)
|
||||
.unwrap();
|
||||
let start = Instant::now();
|
||||
for i in 0..usize::try_from(iters).unwrap() {
|
||||
histo.observe(buckets[i % buckets.len()]);
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
// self-test
|
||||
let mfs = histo.collect();
|
||||
assert_eq!(mfs.len(), 1);
|
||||
let metrics = mfs[0].get_metric();
|
||||
assert_eq!(metrics.len(), 1);
|
||||
let histo = metrics[0].get_histogram();
|
||||
let buckets = histo.get_bucket();
|
||||
assert!(
|
||||
buckets
|
||||
.iter()
|
||||
.enumerate()
|
||||
.all(|(i, b)| b.get_cumulative_count()
|
||||
>= i as u64 * (iters / buckets.len() as u64))
|
||||
);
|
||||
elapsed
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(
|
||||
label_values,
|
||||
single_metric_multicore_scalability,
|
||||
propagation_of_cached_label_value,
|
||||
histograms,
|
||||
propagation_of_cached_label_value
|
||||
);
|
||||
|
||||
/*
|
||||
@@ -336,14 +290,6 @@ propagation_of_cached_label_value__naive/nthreads/8 time: [211.50 ns 214.44 ns
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [14.135 ns 14.147 ns 14.160 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [14.243 ns 14.255 ns 14.268 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [14.470 ns 14.682 ns 14.895 ns]
|
||||
bucket_scalability/nbuckets/1 time: [30.352 ns 30.353 ns 30.354 ns]
|
||||
bucket_scalability/nbuckets/4 time: [30.464 ns 30.465 ns 30.467 ns]
|
||||
bucket_scalability/nbuckets/8 time: [30.569 ns 30.575 ns 30.584 ns]
|
||||
bucket_scalability/nbuckets/16 time: [30.961 ns 30.965 ns 30.969 ns]
|
||||
bucket_scalability/nbuckets/32 time: [35.691 ns 35.707 ns 35.722 ns]
|
||||
bucket_scalability/nbuckets/64 time: [47.829 ns 47.898 ns 47.974 ns]
|
||||
bucket_scalability/nbuckets/128 time: [73.479 ns 73.512 ns 73.545 ns]
|
||||
bucket_scalability/nbuckets/256 time: [127.92 ns 127.94 ns 127.96 ns]
|
||||
|
||||
Results on an i3en.3xlarge instance
|
||||
|
||||
@@ -398,14 +344,6 @@ propagation_of_cached_label_value__naive/nthreads/8 time: [434.87 ns 456.4
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [3.3767 ns 3.3974 ns 3.4220 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [3.6105 ns 4.2355 ns 5.1463 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [4.0889 ns 4.9714 ns 6.0779 ns]
|
||||
bucket_scalability/nbuckets/1 time: [4.8455 ns 4.8542 ns 4.8646 ns]
|
||||
bucket_scalability/nbuckets/4 time: [4.5663 ns 4.5722 ns 4.5787 ns]
|
||||
bucket_scalability/nbuckets/8 time: [4.5531 ns 4.5670 ns 4.5842 ns]
|
||||
bucket_scalability/nbuckets/16 time: [4.6392 ns 4.6524 ns 4.6685 ns]
|
||||
bucket_scalability/nbuckets/32 time: [6.0302 ns 6.0439 ns 6.0589 ns]
|
||||
bucket_scalability/nbuckets/64 time: [10.608 ns 10.644 ns 10.691 ns]
|
||||
bucket_scalability/nbuckets/128 time: [22.178 ns 22.316 ns 22.483 ns]
|
||||
bucket_scalability/nbuckets/256 time: [42.190 ns 42.328 ns 42.492 ns]
|
||||
|
||||
Results on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor
|
||||
|
||||
@@ -424,13 +362,5 @@ propagation_of_cached_label_value__naive/nthreads/8 time: [164.24 ns 170.1
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [2.2915 ns 2.2960 ns 2.3012 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [2.5726 ns 2.6158 ns 2.6624 ns]
|
||||
propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [2.7068 ns 2.8243 ns 2.9824 ns]
|
||||
bucket_scalability/nbuckets/1 time: [6.3998 ns 6.4288 ns 6.4684 ns]
|
||||
bucket_scalability/nbuckets/4 time: [6.3603 ns 6.3620 ns 6.3637 ns]
|
||||
bucket_scalability/nbuckets/8 time: [6.1646 ns 6.1654 ns 6.1667 ns]
|
||||
bucket_scalability/nbuckets/16 time: [6.1341 ns 6.1391 ns 6.1454 ns]
|
||||
bucket_scalability/nbuckets/32 time: [8.2206 ns 8.2254 ns 8.2301 ns]
|
||||
bucket_scalability/nbuckets/64 time: [13.988 ns 13.994 ns 14.000 ns]
|
||||
bucket_scalability/nbuckets/128 time: [28.180 ns 28.216 ns 28.251 ns]
|
||||
bucket_scalability/nbuckets/256 time: [54.914 ns 54.931 ns 54.951 ns]
|
||||
|
||||
*/
|
||||
|
||||
@@ -9,6 +9,7 @@ bytes.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
prost.workspace = true
|
||||
smallvec.workspace = true
|
||||
thiserror.workspace = true
|
||||
tonic.workspace = true
|
||||
utils.workspace = true
|
||||
|
||||
@@ -9,16 +9,10 @@
|
||||
//! - Use more precise datatypes, e.g. Lsn and uints shorter than 32 bits.
|
||||
//!
|
||||
//! - Validate protocol invariants, via try_from() and try_into().
|
||||
//!
|
||||
//! Validation only happens on the receiver side, i.e. when converting from Protobuf to domain
|
||||
//! types. This is where it matters -- the Protobuf types are less strict than the domain types, and
|
||||
//! receivers should expect all sorts of junk from senders. This also allows the sender to use e.g.
|
||||
//! stream combinators without dealing with errors, and avoids validating the same message twice.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use bytes::Bytes;
|
||||
use postgres_ffi::Oid;
|
||||
use smallvec::SmallVec;
|
||||
// TODO: split out Lsn, RelTag, SlruKind, Oid and other basic types to a separate crate, to avoid
|
||||
// pulling in all of their other crate dependencies when building the client.
|
||||
use utils::lsn::Lsn;
|
||||
@@ -54,8 +48,7 @@ pub struct ReadLsn {
|
||||
pub request_lsn: Lsn,
|
||||
/// If given, the caller guarantees that the page has not been modified since this LSN. Must be
|
||||
/// smaller than or equal to request_lsn. This allows the Pageserver to serve an old page
|
||||
/// without waiting for the request LSN to arrive. If not given, the request will read at the
|
||||
/// request_lsn and wait for it to arrive if necessary. Valid for all request types.
|
||||
/// without waiting for the request LSN to arrive. Valid for all request types.
|
||||
///
|
||||
/// It is undefined behaviour to make a request such that the page was, in fact, modified
|
||||
/// between request_lsn and not_modified_since_lsn. The Pageserver might detect it and return an
|
||||
@@ -65,14 +58,19 @@ pub struct ReadLsn {
|
||||
pub not_modified_since_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
impl Display for ReadLsn {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let req_lsn = self.request_lsn;
|
||||
if let Some(mod_lsn) = self.not_modified_since_lsn {
|
||||
write!(f, "{req_lsn}>={mod_lsn}")
|
||||
} else {
|
||||
req_lsn.fmt(f)
|
||||
impl ReadLsn {
|
||||
/// Validates the ReadLsn.
|
||||
pub fn validate(&self) -> Result<(), ProtocolError> {
|
||||
if self.request_lsn == Lsn::INVALID {
|
||||
return Err(ProtocolError::invalid("request_lsn", self.request_lsn));
|
||||
}
|
||||
if self.not_modified_since_lsn > Some(self.request_lsn) {
|
||||
return Err(ProtocolError::invalid(
|
||||
"not_modified_since_lsn",
|
||||
self.not_modified_since_lsn,
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,31 +78,27 @@ impl TryFrom<proto::ReadLsn> for ReadLsn {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(pb: proto::ReadLsn) -> Result<Self, Self::Error> {
|
||||
if pb.request_lsn == 0 {
|
||||
return Err(ProtocolError::invalid("request_lsn", pb.request_lsn));
|
||||
}
|
||||
if pb.not_modified_since_lsn > pb.request_lsn {
|
||||
return Err(ProtocolError::invalid(
|
||||
"not_modified_since_lsn",
|
||||
pb.not_modified_since_lsn,
|
||||
));
|
||||
}
|
||||
Ok(Self {
|
||||
let read_lsn = Self {
|
||||
request_lsn: Lsn(pb.request_lsn),
|
||||
not_modified_since_lsn: match pb.not_modified_since_lsn {
|
||||
0 => None,
|
||||
lsn => Some(Lsn(lsn)),
|
||||
},
|
||||
})
|
||||
};
|
||||
read_lsn.validate()?;
|
||||
Ok(read_lsn)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ReadLsn> for proto::ReadLsn {
|
||||
fn from(read_lsn: ReadLsn) -> Self {
|
||||
Self {
|
||||
impl TryFrom<ReadLsn> for proto::ReadLsn {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(read_lsn: ReadLsn) -> Result<Self, Self::Error> {
|
||||
read_lsn.validate()?;
|
||||
Ok(Self {
|
||||
request_lsn: read_lsn.request_lsn.0,
|
||||
not_modified_since_lsn: read_lsn.not_modified_since_lsn.unwrap_or_default().0,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,15 +153,6 @@ impl TryFrom<proto::CheckRelExistsRequest> for CheckRelExistsRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CheckRelExistsRequest> for proto::CheckRelExistsRequest {
|
||||
fn from(request: CheckRelExistsRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
rel: Some(request.rel.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type CheckRelExistsResponse = bool;
|
||||
|
||||
impl From<proto::CheckRelExistsResponse> for CheckRelExistsResponse {
|
||||
@@ -205,12 +190,14 @@ impl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
|
||||
fn from(request: GetBaseBackupRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
impl TryFrom<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(request: GetBaseBackupRequest) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
read_lsn: Some(request.read_lsn.try_into()?),
|
||||
replica: request.replica,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,9 +214,14 @@ impl TryFrom<proto::GetBaseBackupResponseChunk> for GetBaseBackupResponseChunk {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetBaseBackupResponseChunk> for proto::GetBaseBackupResponseChunk {
|
||||
fn from(chunk: GetBaseBackupResponseChunk) -> Self {
|
||||
Self { chunk }
|
||||
impl TryFrom<GetBaseBackupResponseChunk> for proto::GetBaseBackupResponseChunk {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(chunk: GetBaseBackupResponseChunk) -> Result<Self, Self::Error> {
|
||||
if chunk.is_empty() {
|
||||
return Err(ProtocolError::Missing("chunk"));
|
||||
}
|
||||
Ok(Self { chunk })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -254,12 +246,14 @@ impl TryFrom<proto::GetDbSizeRequest> for GetDbSizeRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetDbSizeRequest> for proto::GetDbSizeRequest {
|
||||
fn from(request: GetDbSizeRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
impl TryFrom<GetDbSizeRequest> for proto::GetDbSizeRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(request: GetDbSizeRequest) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
read_lsn: Some(request.read_lsn.try_into()?),
|
||||
db_oid: request.db_oid,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -294,7 +288,7 @@ pub struct GetPageRequest {
|
||||
/// Multiple pages will be executed as a single batch by the Pageserver, amortizing layer access
|
||||
/// costs and parallelizing them. This may increase the latency of any individual request, but
|
||||
/// improves the overall latency and throughput of the batch as a whole.
|
||||
pub block_numbers: Vec<u32>,
|
||||
pub block_numbers: SmallVec<[u32; 1]>,
|
||||
}
|
||||
|
||||
impl TryFrom<proto::GetPageRequest> for GetPageRequest {
|
||||
@@ -312,20 +306,25 @@ impl TryFrom<proto::GetPageRequest> for GetPageRequest {
|
||||
.ok_or(ProtocolError::Missing("read_lsn"))?
|
||||
.try_into()?,
|
||||
rel: pb.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?,
|
||||
block_numbers: pb.block_number,
|
||||
block_numbers: pb.block_number.into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetPageRequest> for proto::GetPageRequest {
|
||||
fn from(request: GetPageRequest) -> Self {
|
||||
Self {
|
||||
impl TryFrom<GetPageRequest> for proto::GetPageRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(request: GetPageRequest) -> Result<Self, Self::Error> {
|
||||
if request.block_numbers.is_empty() {
|
||||
return Err(ProtocolError::Missing("block_number"));
|
||||
}
|
||||
Ok(Self {
|
||||
request_id: request.request_id,
|
||||
request_class: request.request_class.into(),
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
read_lsn: Some(request.read_lsn.try_into()?),
|
||||
rel: Some(request.rel.into()),
|
||||
block_number: request.block_numbers,
|
||||
}
|
||||
block_number: request.block_numbers.into_vec(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -397,7 +396,7 @@ pub struct GetPageResponse {
|
||||
/// A string describing the status, if any.
|
||||
pub reason: Option<String>,
|
||||
/// The 8KB page images, in the same order as the request. Empty if status != OK.
|
||||
pub page_images: Vec<Bytes>,
|
||||
pub page_images: SmallVec<[Bytes; 1]>,
|
||||
}
|
||||
|
||||
impl From<proto::GetPageResponse> for GetPageResponse {
|
||||
@@ -406,7 +405,7 @@ impl From<proto::GetPageResponse> for GetPageResponse {
|
||||
request_id: pb.request_id,
|
||||
status_code: pb.status_code.into(),
|
||||
reason: Some(pb.reason).filter(|r| !r.is_empty()),
|
||||
page_images: pb.page_image,
|
||||
page_images: pb.page_image.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -417,7 +416,7 @@ impl From<GetPageResponse> for proto::GetPageResponse {
|
||||
request_id: response.request_id,
|
||||
status_code: response.status_code.into(),
|
||||
reason: response.reason.unwrap_or_default(),
|
||||
page_image: response.page_images,
|
||||
page_image: response.page_images.into_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -506,12 +505,14 @@ impl TryFrom<proto::GetRelSizeRequest> for GetRelSizeRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetRelSizeRequest> for proto::GetRelSizeRequest {
|
||||
fn from(request: GetRelSizeRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
impl TryFrom<GetRelSizeRequest> for proto::GetRelSizeRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(request: GetRelSizeRequest) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
read_lsn: Some(request.read_lsn.try_into()?),
|
||||
rel: Some(request.rel.into()),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -554,13 +555,15 @@ impl TryFrom<proto::GetSlruSegmentRequest> for GetSlruSegmentRequest {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetSlruSegmentRequest> for proto::GetSlruSegmentRequest {
|
||||
fn from(request: GetSlruSegmentRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
impl TryFrom<GetSlruSegmentRequest> for proto::GetSlruSegmentRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(request: GetSlruSegmentRequest) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
read_lsn: Some(request.read_lsn.try_into()?),
|
||||
kind: request.kind as u32,
|
||||
segno: request.segno,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -577,9 +580,14 @@ impl TryFrom<proto::GetSlruSegmentResponse> for GetSlruSegmentResponse {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
|
||||
fn from(segment: GetSlruSegmentResponse) -> Self {
|
||||
Self { segment }
|
||||
impl TryFrom<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(segment: GetSlruSegmentResponse) -> Result<Self, Self::Error> {
|
||||
if segment.is_empty() {
|
||||
return Err(ProtocolError::Missing("segment"));
|
||||
}
|
||||
Ok(Self { segment })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
camino.workspace = true
|
||||
clap.workspace = true
|
||||
futures.workspace = true
|
||||
@@ -16,17 +15,14 @@ hdrhistogram.workspace = true
|
||||
humantime.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
reqwest.workspace=true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tracing.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
pageserver_client.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_page_api.workspace = true
|
||||
utils = { path = "../../libs/utils/" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -7,15 +7,11 @@ use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpaceAccum;
|
||||
use pageserver_api::models::{
|
||||
PagestreamGetPageRequest, PagestreamGetPageResponse, PagestreamRequest,
|
||||
};
|
||||
use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_page_api::proto;
|
||||
use rand::prelude::*;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -26,12 +22,6 @@ use utils::lsn::Lsn;
|
||||
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
||||
use crate::util::{request_stats, tokio_thread_local_stats};
|
||||
|
||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
||||
enum Protocol {
|
||||
Libpq,
|
||||
Grpc,
|
||||
}
|
||||
|
||||
/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
|
||||
#[derive(clap::Parser)]
|
||||
pub(crate) struct Args {
|
||||
@@ -45,8 +35,6 @@ pub(crate) struct Args {
|
||||
num_clients: NonZeroUsize,
|
||||
#[clap(long)]
|
||||
runtime: Option<humantime::Duration>,
|
||||
#[clap(long, value_enum, default_value = "libpq")]
|
||||
protocol: Protocol,
|
||||
/// Each client sends requests at the given rate.
|
||||
///
|
||||
/// If a request takes too long and we should be issuing a new request already,
|
||||
@@ -315,20 +303,7 @@ async fn main_impl(
|
||||
.unwrap();
|
||||
|
||||
Box::pin(async move {
|
||||
let client: Box<dyn Client> = match args.protocol {
|
||||
Protocol::Libpq => Box::new(
|
||||
LibpqClient::new(args.page_service_connstring.clone(), worker_id.timeline)
|
||||
.await
|
||||
.unwrap(),
|
||||
),
|
||||
|
||||
Protocol::Grpc => Box::new(
|
||||
GrpcClient::new(args.page_service_connstring.clone(), worker_id.timeline)
|
||||
.await
|
||||
.unwrap(),
|
||||
),
|
||||
};
|
||||
run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
|
||||
client_libpq(args, worker_id, ss, cancel, rps_period, ranges, weights).await
|
||||
})
|
||||
};
|
||||
|
||||
@@ -380,15 +355,23 @@ async fn main_impl(
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
async fn run_worker(
|
||||
async fn client_libpq(
|
||||
args: &Args,
|
||||
mut client: Box<dyn Client>,
|
||||
worker_id: WorkerId,
|
||||
shared_state: Arc<SharedState>,
|
||||
cancel: CancellationToken,
|
||||
rps_period: Option<Duration>,
|
||||
ranges: Vec<KeyRange>,
|
||||
weights: rand::distributions::weighted::WeightedIndex<i128>,
|
||||
) {
|
||||
let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let mut client = client
|
||||
.pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
shared_state.start_work_barrier.wait().await;
|
||||
let client_start = Instant::now();
|
||||
let mut ticks_processed = 0;
|
||||
@@ -432,12 +415,12 @@ async fn run_worker(
|
||||
blkno: block_no,
|
||||
}
|
||||
};
|
||||
client.send_get_page(req).await.unwrap();
|
||||
client.getpage_send(req).await.unwrap();
|
||||
inflight.push_back(start);
|
||||
}
|
||||
|
||||
let start = inflight.pop_front().unwrap();
|
||||
client.recv_get_page().await.unwrap();
|
||||
client.getpage_recv().await.unwrap();
|
||||
let end = Instant::now();
|
||||
shared_state.live_stats.request_done();
|
||||
ticks_processed += 1;
|
||||
@@ -459,104 +442,3 @@ async fn run_worker(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A benchmark client, to allow switching out the transport protocol.
|
||||
///
|
||||
/// For simplicity, this just uses separate asynchronous send/recv methods. The send method could
|
||||
/// return a future that resolves when the response is received, but we don't really need it.
|
||||
#[async_trait]
|
||||
trait Client: Send {
|
||||
/// Sends an asynchronous GetPage request to the pageserver.
|
||||
async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()>;
|
||||
|
||||
/// Receives the next GetPage response from the pageserver.
|
||||
async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse>;
|
||||
}
|
||||
|
||||
/// A libpq-based Pageserver client.
|
||||
struct LibpqClient {
|
||||
inner: pageserver_client::page_service::PagestreamClient,
|
||||
}
|
||||
|
||||
impl LibpqClient {
|
||||
async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
||||
let inner = pageserver_client::page_service::Client::new(connstring)
|
||||
.await?
|
||||
.pagestream(ttid.tenant_id, ttid.timeline_id)
|
||||
.await?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Client for LibpqClient {
|
||||
async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
|
||||
self.inner.getpage_send(req).await
|
||||
}
|
||||
|
||||
async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
self.inner.getpage_recv().await
|
||||
}
|
||||
}
|
||||
|
||||
/// A gRPC client using the raw, no-frills gRPC client.
|
||||
struct GrpcClient {
|
||||
req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
|
||||
resp_rx: tonic::Streaming<proto::GetPageResponse>,
|
||||
}
|
||||
|
||||
impl GrpcClient {
|
||||
async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
||||
let mut client = pageserver_page_api::proto::PageServiceClient::connect(connstring).await?;
|
||||
|
||||
// The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the
|
||||
// benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are
|
||||
// buffered by Tonic and the OS too.
|
||||
let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
|
||||
let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
|
||||
let mut req = tonic::Request::new(req_stream);
|
||||
let metadata = req.metadata_mut();
|
||||
metadata.insert("neon-tenant-id", ttid.tenant_id.to_string().try_into()?);
|
||||
metadata.insert("neon-timeline-id", ttid.timeline_id.to_string().try_into()?);
|
||||
metadata.insert("neon-shard-id", "0000".try_into()?);
|
||||
|
||||
let resp = client.get_pages(req).await?;
|
||||
let resp_stream = resp.into_inner();
|
||||
|
||||
Ok(Self {
|
||||
req_tx,
|
||||
resp_rx: resp_stream,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Client for GrpcClient {
|
||||
async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
|
||||
let req = proto::GetPageRequest {
|
||||
request_id: 0,
|
||||
request_class: proto::GetPageClass::Normal as i32,
|
||||
read_lsn: Some(proto::ReadLsn {
|
||||
request_lsn: req.hdr.request_lsn.0,
|
||||
not_modified_since_lsn: req.hdr.not_modified_since.0,
|
||||
}),
|
||||
rel: Some(req.rel.into()),
|
||||
block_number: vec![req.blkno],
|
||||
};
|
||||
self.req_tx.send(req).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
|
||||
let resp = self.resp_rx.message().await?.unwrap();
|
||||
anyhow::ensure!(
|
||||
resp.status_code == proto::GetPageStatusCode::Ok as i32,
|
||||
"unexpected status code: {}",
|
||||
resp.status_code
|
||||
);
|
||||
Ok(PagestreamGetPageResponse {
|
||||
page: resp.page_image[0].clone(),
|
||||
req: PagestreamGetPageRequest::default(), // dummy
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,30 +65,6 @@ impl From<GetVectoredError> for BasebackupError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BasebackupError> for postgres_backend::QueryError {
|
||||
fn from(err: BasebackupError) -> Self {
|
||||
use postgres_backend::QueryError;
|
||||
use pq_proto::framed::ConnectionError;
|
||||
match err {
|
||||
BasebackupError::Client(err, _) => QueryError::Disconnected(ConnectionError::Io(err)),
|
||||
BasebackupError::Server(err) => QueryError::Other(err),
|
||||
BasebackupError::Shutdown => QueryError::Shutdown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BasebackupError> for tonic::Status {
|
||||
fn from(err: BasebackupError) -> Self {
|
||||
use tonic::Code;
|
||||
let code = match &err {
|
||||
BasebackupError::Client(_, _) => Code::Cancelled,
|
||||
BasebackupError::Server(_) => Code::Internal,
|
||||
BasebackupError::Shutdown => Code::Unavailable,
|
||||
};
|
||||
tonic::Status::new(code, err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create basebackup with non-rel data in it.
|
||||
/// Only include relational data if 'full_backup' is true.
|
||||
///
|
||||
@@ -272,7 +248,7 @@ where
|
||||
async fn flush(&mut self) -> Result<(), BasebackupError> {
|
||||
let nblocks = self.buf.len() / BLCKSZ as usize;
|
||||
let (kind, segno) = self.current_segment.take().unwrap();
|
||||
let segname = format!("{kind}/{segno:>04X}");
|
||||
let segname = format!("{}/{:>04X}", kind.to_str(), segno);
|
||||
let header = new_tar_header(&segname, self.buf.len() as u64)?;
|
||||
self.ar
|
||||
.append(&header, self.buf.as_slice())
|
||||
@@ -371,7 +347,7 @@ where
|
||||
.await?
|
||||
.partition(
|
||||
self.timeline.get_shard_identity(),
|
||||
self.timeline.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
|
||||
Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
|
||||
);
|
||||
|
||||
let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
|
||||
|
||||
@@ -804,7 +804,7 @@ fn start_pageserver(
|
||||
} else {
|
||||
None
|
||||
},
|
||||
basebackup_cache,
|
||||
basebackup_cache.clone(),
|
||||
);
|
||||
|
||||
// Spawn a Pageserver gRPC server task. It will spawn separate tasks for
|
||||
@@ -816,10 +816,12 @@ fn start_pageserver(
|
||||
let mut page_service_grpc = None;
|
||||
if let Some(grpc_listener) = grpc_listener {
|
||||
page_service_grpc = Some(page_service::spawn_grpc(
|
||||
conf,
|
||||
tenant_manager.clone(),
|
||||
grpc_auth,
|
||||
otel_guard.as_ref().map(|g| g.dispatch.clone()),
|
||||
grpc_listener,
|
||||
basebackup_cache,
|
||||
)?);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,10 +14,7 @@ use std::time::Duration;
|
||||
use anyhow::{Context, bail, ensure};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::config::{
|
||||
DiskUsageEvictionTaskConfig, MaxGetVectoredKeys, MaxVectoredReadBytes,
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, PostHogConfig,
|
||||
};
|
||||
use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes, PostHogConfig};
|
||||
use pageserver_api::models::ImageCompressionAlgorithm;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pem::Pem;
|
||||
@@ -188,9 +185,6 @@ pub struct PageServerConf {
|
||||
|
||||
pub max_vectored_read_bytes: MaxVectoredReadBytes,
|
||||
|
||||
/// Maximum number of keys to be read in a single get_vectored call.
|
||||
pub max_get_vectored_keys: MaxGetVectoredKeys,
|
||||
|
||||
pub image_compression: ImageCompressionAlgorithm,
|
||||
|
||||
/// Whether to offload archived timelines automatically
|
||||
@@ -410,7 +404,6 @@ impl PageServerConf {
|
||||
secondary_download_concurrency,
|
||||
ingest_batch_size,
|
||||
max_vectored_read_bytes,
|
||||
max_get_vectored_keys,
|
||||
image_compression,
|
||||
timeline_offloading,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
@@ -477,7 +470,6 @@ impl PageServerConf {
|
||||
secondary_download_concurrency,
|
||||
ingest_batch_size,
|
||||
max_vectored_read_bytes,
|
||||
max_get_vectored_keys,
|
||||
image_compression,
|
||||
timeline_offloading,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
@@ -606,19 +598,6 @@ impl PageServerConf {
|
||||
)
|
||||
})?;
|
||||
|
||||
if let PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
|
||||
max_batch_size,
|
||||
..
|
||||
}) = conf.page_service_pipelining
|
||||
{
|
||||
if max_batch_size.get() > conf.max_get_vectored_keys.get() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"`max_batch_size` ({max_batch_size}) must be less than or equal to `max_get_vectored_keys` ({})",
|
||||
conf.max_get_vectored_keys.get()
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(conf)
|
||||
}
|
||||
|
||||
@@ -706,7 +685,6 @@ impl ConfigurableSemaphore {
|
||||
mod tests {
|
||||
|
||||
use camino::Utf8PathBuf;
|
||||
use rstest::rstest;
|
||||
use utils::id::NodeId;
|
||||
|
||||
use super::PageServerConf;
|
||||
@@ -746,28 +724,4 @@ mod tests {
|
||||
PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
|
||||
.expect_err("parse_and_validate should fail for endpoint without scheme");
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(32, 32, true)]
|
||||
#[case(64, 32, false)]
|
||||
#[case(64, 64, true)]
|
||||
#[case(128, 128, true)]
|
||||
fn test_config_max_batch_size_is_valid(
|
||||
#[case] max_batch_size: usize,
|
||||
#[case] max_get_vectored_keys: usize,
|
||||
#[case] is_valid: bool,
|
||||
) {
|
||||
let input = format!(
|
||||
r#"
|
||||
control_plane_api = "http://localhost:6666"
|
||||
max_get_vectored_keys = {max_get_vectored_keys}
|
||||
page_service_pipelining = {{ mode="pipelined", execution="concurrent-futures", max_batch_size={max_batch_size}, batching="uniform-lsn" }}
|
||||
"#,
|
||||
);
|
||||
let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(&input)
|
||||
.expect("config has valid fields");
|
||||
let workdir = Utf8PathBuf::from("/nonexistent");
|
||||
let result = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir);
|
||||
assert_eq!(result.is_ok(), is_valid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,14 +91,4 @@ impl FeatureResolver {
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_feature_flag_boolean(&self, flag_key: &str) -> Result<bool, PostHogEvaluationError> {
|
||||
if let Some(inner) = &self.inner {
|
||||
inner.feature_store().is_feature_flag_boolean(flag_key)
|
||||
} else {
|
||||
Err(PostHogEvaluationError::NotAvailable(
|
||||
"PostHog integration is not enabled".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3663,46 +3663,6 @@ async fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn tenant_evaluate_feature_flag(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let flag: String = must_parse_query_param(&request, "flag")?;
|
||||
let as_type: String = must_parse_query_param(&request, "as")?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
async {
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
if as_type == "boolean" {
|
||||
let result = tenant.feature_resolver.evaluate_boolean(&flag, tenant_shard_id.tenant_id);
|
||||
let result = result.map(|_| true).map_err(|e| e.to_string());
|
||||
json_response(StatusCode::OK, result)
|
||||
} else if as_type == "multivariate" {
|
||||
let result = tenant.feature_resolver.evaluate_multivariate(&flag, tenant_shard_id.tenant_id).map_err(|e| e.to_string());
|
||||
json_response(StatusCode::OK, result)
|
||||
} else {
|
||||
// Auto infer the type of the feature flag.
|
||||
let is_boolean = tenant.feature_resolver.is_feature_flag_boolean(&flag).map_err(|e| ApiError::InternalServerError(anyhow::anyhow!("{e}")))?;
|
||||
if is_boolean {
|
||||
let result = tenant.feature_resolver.evaluate_boolean(&flag, tenant_shard_id.tenant_id);
|
||||
let result = result.map(|_| true).map_err(|e| e.to_string());
|
||||
json_response(StatusCode::OK, result)
|
||||
} else {
|
||||
let result = tenant.feature_resolver.evaluate_multivariate(&flag, tenant_shard_id.tenant_id).map_err(|e| e.to_string());
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
.instrument(info_span!("tenant_evaluate_feature_flag", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
|
||||
.await
|
||||
}
|
||||
|
||||
/// Common functionality of all the HTTP API handlers.
|
||||
///
|
||||
/// - Adds a tracing span to each request (by `request_span`)
|
||||
@@ -4079,8 +4039,5 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/activate_post_import",
|
||||
|r| api_handler(r, activate_post_import_handler),
|
||||
)
|
||||
.get("/v1/tenant/:tenant_shard_id/feature_flag", |r| {
|
||||
api_handler(r, tenant_evaluate_feature_flag)
|
||||
})
|
||||
.any(handler_404))
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ use metrics::{
|
||||
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::config::defaults::DEFAULT_MAX_GET_VECTORED_KEYS;
|
||||
use pageserver_api::config::{
|
||||
PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
|
||||
PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
|
||||
@@ -33,6 +32,7 @@ use crate::config::PageServerConf;
|
||||
use crate::context::{PageContentKind, RequestContext};
|
||||
use crate::pgdatadir_mapping::DatadirModificationStats;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::tenant::layer_map::LayerMap;
|
||||
use crate::tenant::mgr::TenantSlot;
|
||||
use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
|
||||
@@ -1312,44 +1312,11 @@ impl EvictionsWithLowResidenceDuration {
|
||||
//
|
||||
// Roughly logarithmic scale.
|
||||
const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
|
||||
0.00005, // 50us
|
||||
0.00006, // 60us
|
||||
0.00007, // 70us
|
||||
0.00008, // 80us
|
||||
0.00009, // 90us
|
||||
0.0001, // 100us
|
||||
0.000110, // 110us
|
||||
0.000120, // 120us
|
||||
0.000130, // 130us
|
||||
0.000140, // 140us
|
||||
0.000150, // 150us
|
||||
0.000160, // 160us
|
||||
0.000170, // 170us
|
||||
0.000180, // 180us
|
||||
0.000190, // 190us
|
||||
0.000200, // 200us
|
||||
0.000210, // 210us
|
||||
0.000220, // 220us
|
||||
0.000230, // 230us
|
||||
0.000240, // 240us
|
||||
0.000250, // 250us
|
||||
0.000300, // 300us
|
||||
0.000350, // 350us
|
||||
0.000400, // 400us
|
||||
0.000450, // 450us
|
||||
0.000500, // 500us
|
||||
0.000600, // 600us
|
||||
0.000700, // 700us
|
||||
0.000800, // 800us
|
||||
0.000900, // 900us
|
||||
0.001000, // 1ms
|
||||
0.002000, // 2ms
|
||||
0.003000, // 3ms
|
||||
0.004000, // 4ms
|
||||
0.005000, // 5ms
|
||||
0.01000, // 10ms
|
||||
0.02000, // 20ms
|
||||
0.05000, // 50ms
|
||||
0.000030, // 30 usec
|
||||
0.001000, // 1000 usec
|
||||
0.030, // 30 ms
|
||||
1.000, // 1000 ms
|
||||
30.000, // 30000 ms
|
||||
];
|
||||
|
||||
/// VirtualFile fs operation variants.
|
||||
@@ -1939,7 +1906,7 @@ static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
|
||||
(1..=u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap())
|
||||
(1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
|
||||
.map(|v| v.into())
|
||||
.collect()
|
||||
});
|
||||
@@ -1957,7 +1924,7 @@ static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(
|
||||
let mut buckets = Vec::new();
|
||||
for i in 0.. {
|
||||
let bucket = 1 << i;
|
||||
if bucket > u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap() {
|
||||
if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
|
||||
break;
|
||||
}
|
||||
buckets.push(bucket.into());
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -431,10 +431,10 @@ impl Timeline {
|
||||
GetVectoredError::InvalidLsn(e) => {
|
||||
Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
|
||||
}
|
||||
// NB: this should never happen in practice because we limit batch size to be smaller than max_get_vectored_keys
|
||||
// NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
|
||||
// TODO: we can prevent this error class by moving this check into the type system
|
||||
GetVectoredError::Oversized(err, max) => {
|
||||
Err(anyhow::anyhow!("batching oversized: {err} > {max}").into())
|
||||
GetVectoredError::Oversized(err) => {
|
||||
Err(anyhow::anyhow!("batching oversized: {err:?}").into())
|
||||
}
|
||||
};
|
||||
|
||||
@@ -471,19 +471,8 @@ impl Timeline {
|
||||
|
||||
let rels = self.list_rels(spcnode, dbnode, version, ctx).await?;
|
||||
|
||||
if rels.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Pre-deserialize the rel directory to avoid duplicated work in `get_relsize_cached`.
|
||||
let reldir_key = rel_dir_to_key(spcnode, dbnode);
|
||||
let buf = version.get(self, reldir_key, ctx).await?;
|
||||
let reldir = RelDirectory::des(&buf)?;
|
||||
|
||||
for rel in rels {
|
||||
let n_blocks = self
|
||||
.get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), ctx)
|
||||
.await?;
|
||||
let n_blocks = self.get_rel_size(rel, version, ctx).await?;
|
||||
total_blocks += n_blocks as usize;
|
||||
}
|
||||
Ok(total_blocks)
|
||||
@@ -498,19 +487,6 @@ impl Timeline {
|
||||
tag: RelTag,
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<BlockNumber, PageReconstructError> {
|
||||
self.get_rel_size_in_reldir(tag, version, None, ctx).await
|
||||
}
|
||||
|
||||
/// Get size of a relation file. The relation must exist, otherwise an error is returned.
|
||||
///
|
||||
/// See [`Self::get_rel_exists_in_reldir`] on why we need `deserialized_reldir_v1`.
|
||||
pub(crate) async fn get_rel_size_in_reldir(
|
||||
&self,
|
||||
tag: RelTag,
|
||||
version: Version<'_>,
|
||||
deserialized_reldir_v1: Option<(Key, &RelDirectory)>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<BlockNumber, PageReconstructError> {
|
||||
if tag.relnode == 0 {
|
||||
return Err(PageReconstructError::Other(
|
||||
@@ -523,9 +499,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM)
|
||||
&& !self
|
||||
.get_rel_exists_in_reldir(tag, version, deserialized_reldir_v1, ctx)
|
||||
.await?
|
||||
&& !self.get_rel_exists(tag, version, ctx).await?
|
||||
{
|
||||
// FIXME: Postgres sometimes calls smgrcreate() to create
|
||||
// FSM, and smgrnblocks() on it immediately afterwards,
|
||||
@@ -547,28 +521,11 @@ impl Timeline {
|
||||
///
|
||||
/// Only shard 0 has a full view of the relations. Other shards only know about relations that
|
||||
/// the shard stores pages for.
|
||||
///
|
||||
pub(crate) async fn get_rel_exists(
|
||||
&self,
|
||||
tag: RelTag,
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, PageReconstructError> {
|
||||
self.get_rel_exists_in_reldir(tag, version, None, ctx).await
|
||||
}
|
||||
|
||||
/// Does the relation exist? With a cached deserialized `RelDirectory`.
|
||||
///
|
||||
/// There are some cases where the caller loops across all relations. In that specific case,
|
||||
/// the caller should obtain the deserialized `RelDirectory` first and then call this function
|
||||
/// to avoid duplicated work of deserliazation. This is a hack and should be removed by introducing
|
||||
/// a new API (e.g., `get_rel_exists_batched`).
|
||||
pub(crate) async fn get_rel_exists_in_reldir(
|
||||
&self,
|
||||
tag: RelTag,
|
||||
version: Version<'_>,
|
||||
deserialized_reldir_v1: Option<(Key, &RelDirectory)>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, PageReconstructError> {
|
||||
if tag.relnode == 0 {
|
||||
return Err(PageReconstructError::Other(
|
||||
@@ -611,17 +568,6 @@ impl Timeline {
|
||||
// fetch directory listing (old)
|
||||
|
||||
let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
|
||||
|
||||
if let Some((cached_key, dir)) = deserialized_reldir_v1 {
|
||||
if cached_key == key {
|
||||
return Ok(dir.rels.contains(&(tag.relnode, tag.forknum)));
|
||||
} else if cfg!(test) || cfg!(feature = "testing") {
|
||||
panic!("cached reldir key mismatch: {cached_key} != {key}");
|
||||
} else {
|
||||
warn!("cached reldir key mismatch: {cached_key} != {key}");
|
||||
}
|
||||
// Fallback to reading the directory from the datadir.
|
||||
}
|
||||
let buf = version.get(self, key, ctx).await?;
|
||||
|
||||
let dir = RelDirectory::des(&buf)?;
|
||||
@@ -719,7 +665,7 @@ impl Timeline {
|
||||
|
||||
let batches = keyspace.partition(
|
||||
self.get_shard_identity(),
|
||||
self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
|
||||
Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
|
||||
);
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
@@ -959,7 +905,7 @@ impl Timeline {
|
||||
|
||||
let batches = keyspace.partition(
|
||||
self.get_shard_identity(),
|
||||
self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
|
||||
Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
|
||||
);
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
|
||||
@@ -383,7 +383,7 @@ pub struct TenantShard {
|
||||
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
|
||||
pub(crate) feature_resolver: FeatureResolver,
|
||||
feature_resolver: FeatureResolver,
|
||||
}
|
||||
impl std::fmt::Debug for TenantShard {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
@@ -5832,7 +5832,6 @@ pub(crate) mod harness {
|
||||
pub conf: &'static PageServerConf,
|
||||
pub tenant_conf: pageserver_api::models::TenantConfig,
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
pub shard_identity: ShardIdentity,
|
||||
pub generation: Generation,
|
||||
pub shard: ShardIndex,
|
||||
pub remote_storage: GenericRemoteStorage,
|
||||
@@ -5900,7 +5899,6 @@ pub(crate) mod harness {
|
||||
conf,
|
||||
tenant_conf,
|
||||
tenant_shard_id,
|
||||
shard_identity,
|
||||
generation,
|
||||
shard,
|
||||
remote_storage,
|
||||
@@ -5962,7 +5960,8 @@ pub(crate) mod harness {
|
||||
&ShardParameters::default(),
|
||||
))
|
||||
.unwrap(),
|
||||
self.shard_identity,
|
||||
// This is a legacy/test code path: sharding isn't supported here.
|
||||
ShardIdentity::unsharded(),
|
||||
Some(walredo_mgr),
|
||||
self.tenant_shard_id,
|
||||
self.remote_storage.clone(),
|
||||
@@ -6084,7 +6083,6 @@ mod tests {
|
||||
use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
|
||||
use timeline::{CompactOptions, DeltaLayerTestDesc, VersionedKeySpaceQuery};
|
||||
use utils::id::TenantId;
|
||||
use utils::shard::{ShardCount, ShardNumber};
|
||||
|
||||
use super::*;
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
@@ -7197,7 +7195,7 @@ mod tests {
|
||||
let end = desc
|
||||
.key_range
|
||||
.start
|
||||
.add(tenant.conf.max_get_vectored_keys.get() as u32);
|
||||
.add(Timeline::MAX_GET_VECTORED_KEYS.try_into().unwrap());
|
||||
reads.push(KeySpace {
|
||||
ranges: vec![start..end],
|
||||
});
|
||||
@@ -9420,77 +9418,6 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_failed_flush_should_not_update_disk_consistent_lsn() -> anyhow::Result<()> {
|
||||
//
|
||||
// Setup
|
||||
//
|
||||
let harness = TenantHarness::create_custom(
|
||||
"test_failed_flush_should_not_upload_disk_consistent_lsn",
|
||||
pageserver_api::models::TenantConfig::default(),
|
||||
TenantId::generate(),
|
||||
ShardIdentity::new(ShardNumber(0), ShardCount(4), ShardStripeSize(128)).unwrap(),
|
||||
Generation::new(1),
|
||||
)
|
||||
.await?;
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
let timeline = tenant
|
||||
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
|
||||
.await?;
|
||||
assert_eq!(timeline.get_shard_identity().count, ShardCount(4));
|
||||
let mut writer = timeline.writer().await;
|
||||
writer
|
||||
.put(
|
||||
*TEST_KEY,
|
||||
Lsn(0x20),
|
||||
&Value::Image(test_img("foo at 0x20")),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
writer.finish_write(Lsn(0x20));
|
||||
drop(writer);
|
||||
timeline.freeze_and_flush().await.unwrap();
|
||||
|
||||
timeline.remote_client.wait_completion().await.unwrap();
|
||||
let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
|
||||
let remote_consistent_lsn = timeline.get_remote_consistent_lsn_projected();
|
||||
assert_eq!(Some(disk_consistent_lsn), remote_consistent_lsn);
|
||||
|
||||
//
|
||||
// Test
|
||||
//
|
||||
|
||||
let mut writer = timeline.writer().await;
|
||||
writer
|
||||
.put(
|
||||
*TEST_KEY,
|
||||
Lsn(0x30),
|
||||
&Value::Image(test_img("foo at 0x30")),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
writer.finish_write(Lsn(0x30));
|
||||
drop(writer);
|
||||
|
||||
fail::cfg(
|
||||
"flush-layer-before-update-remote-consistent-lsn",
|
||||
"return()",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let flush_res = timeline.freeze_and_flush().await;
|
||||
// if flush failed, the disk/remote consistent LSN should not be updated
|
||||
assert!(flush_res.is_err());
|
||||
assert_eq!(disk_consistent_lsn, timeline.get_disk_consistent_lsn());
|
||||
assert_eq!(
|
||||
remote_consistent_lsn,
|
||||
timeline.get_remote_consistent_lsn_projected()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tokio::test]
|
||||
async fn test_simple_bottom_most_compaction_deltas_1() -> anyhow::Result<()> {
|
||||
@@ -11260,11 +11187,11 @@ mod tests {
|
||||
let mut keyspaces_at_lsn: HashMap<Lsn, KeySpaceRandomAccum> = HashMap::default();
|
||||
let mut used_keys: HashSet<Key> = HashSet::default();
|
||||
|
||||
while used_keys.len() < tenant.conf.max_get_vectored_keys.get() {
|
||||
while used_keys.len() < Timeline::MAX_GET_VECTORED_KEYS as usize {
|
||||
let selected_lsn = interesting_lsns.choose(&mut random).expect("not empty");
|
||||
let mut selected_key = start_key.add(random.gen_range(0..KEY_DIMENSION_SIZE));
|
||||
|
||||
while used_keys.len() < tenant.conf.max_get_vectored_keys.get() {
|
||||
while used_keys.len() < Timeline::MAX_GET_VECTORED_KEYS as usize {
|
||||
if used_keys.contains(&selected_key)
|
||||
|| selected_key >= start_key.add(KEY_DIMENSION_SIZE)
|
||||
{
|
||||
|
||||
@@ -817,8 +817,8 @@ pub(crate) enum GetVectoredError {
|
||||
#[error("timeline shutting down")]
|
||||
Cancelled,
|
||||
|
||||
#[error("requested too many keys: {0} > {1}")]
|
||||
Oversized(u64, u64),
|
||||
#[error("requested too many keys: {0} > {}", Timeline::MAX_GET_VECTORED_KEYS)]
|
||||
Oversized(u64),
|
||||
|
||||
#[error("requested at invalid LSN: {0}")]
|
||||
InvalidLsn(Lsn),
|
||||
@@ -950,18 +950,6 @@ pub(crate) enum WaitLsnError {
|
||||
Timeout(String),
|
||||
}
|
||||
|
||||
impl From<WaitLsnError> for tonic::Status {
|
||||
fn from(err: WaitLsnError) -> Self {
|
||||
use tonic::Code;
|
||||
let code = match &err {
|
||||
WaitLsnError::Timeout(_) => Code::Internal,
|
||||
WaitLsnError::BadState(_) => Code::Internal,
|
||||
WaitLsnError::Shutdown => Code::Unavailable,
|
||||
};
|
||||
tonic::Status::new(code, err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
// The impls below achieve cancellation mapping for errors.
|
||||
// Perhaps there's a way of achieving this with less cruft.
|
||||
|
||||
@@ -1019,7 +1007,7 @@ impl From<GetVectoredError> for PageReconstructError {
|
||||
match e {
|
||||
GetVectoredError::Cancelled => PageReconstructError::Cancelled,
|
||||
GetVectoredError::InvalidLsn(_) => PageReconstructError::Other(anyhow!("Invalid LSN")),
|
||||
err @ GetVectoredError::Oversized(_, _) => PageReconstructError::Other(err.into()),
|
||||
err @ GetVectoredError::Oversized(_) => PageReconstructError::Other(err.into()),
|
||||
GetVectoredError::MissingKey(err) => PageReconstructError::MissingKey(err),
|
||||
GetVectoredError::GetReadyAncestorError(err) => PageReconstructError::from(err),
|
||||
GetVectoredError::Other(err) => PageReconstructError::Other(err),
|
||||
@@ -1199,6 +1187,7 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) const MAX_GET_VECTORED_KEYS: u64 = 32;
|
||||
pub(crate) const LAYERS_VISITED_WARN_THRESHOLD: u32 = 100;
|
||||
|
||||
/// Look up multiple page versions at a given LSN
|
||||
@@ -1213,12 +1202,9 @@ impl Timeline {
|
||||
) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
|
||||
let total_keyspace = query.total_keyspace();
|
||||
|
||||
let key_count = total_keyspace.total_raw_size();
|
||||
if key_count > self.conf.max_get_vectored_keys.get() {
|
||||
return Err(GetVectoredError::Oversized(
|
||||
key_count as u64,
|
||||
self.conf.max_get_vectored_keys.get() as u64,
|
||||
));
|
||||
let key_count = total_keyspace.total_raw_size().try_into().unwrap();
|
||||
if key_count > Timeline::MAX_GET_VECTORED_KEYS {
|
||||
return Err(GetVectoredError::Oversized(key_count));
|
||||
}
|
||||
|
||||
for range in &total_keyspace.ranges {
|
||||
@@ -4781,10 +4767,7 @@ impl Timeline {
|
||||
|| !flushed_to_lsn.is_valid()
|
||||
);
|
||||
|
||||
if flushed_to_lsn < frozen_to_lsn
|
||||
&& self.shard_identity.count.count() > 1
|
||||
&& result.is_ok()
|
||||
{
|
||||
if flushed_to_lsn < frozen_to_lsn && self.shard_identity.count.count() > 1 {
|
||||
// If our layer flushes didn't carry disk_consistent_lsn up to the `to_lsn` advertised
|
||||
// to us via layer_flush_start_rx, then advance it here.
|
||||
//
|
||||
@@ -4963,10 +4946,6 @@ impl Timeline {
|
||||
return Err(FlushLayerError::Cancelled);
|
||||
}
|
||||
|
||||
fail_point!("flush-layer-before-update-remote-consistent-lsn", |_| {
|
||||
Err(FlushLayerError::Other(anyhow!("failpoint").into()))
|
||||
});
|
||||
|
||||
let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
|
||||
|
||||
// The new on-disk layers are now in the layer map. We can remove the
|
||||
@@ -5272,7 +5251,7 @@ impl Timeline {
|
||||
key = key.next();
|
||||
|
||||
// Maybe flush `key_rest_accum`
|
||||
if key_request_accum.raw_size() >= self.conf.max_get_vectored_keys.get() as u64
|
||||
if key_request_accum.raw_size() >= Timeline::MAX_GET_VECTORED_KEYS
|
||||
|| (last_key_in_range && key_request_accum.raw_size() > 0)
|
||||
{
|
||||
let query =
|
||||
|
||||
@@ -106,8 +106,6 @@ pub async fn doit(
|
||||
);
|
||||
}
|
||||
|
||||
tracing::info!("Import plan executed. Flushing remote changes and notifying storcon");
|
||||
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_file_changes()?;
|
||||
@@ -201,8 +199,8 @@ async fn prepare_import(
|
||||
.await;
|
||||
match res {
|
||||
Ok(_) => break,
|
||||
Err(_err) => {
|
||||
info!("indefinitely waiting for pgdata to finish");
|
||||
Err(err) => {
|
||||
info!(?err, "indefinitely waiting for pgdata to finish");
|
||||
if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())
|
||||
.await
|
||||
.is_ok()
|
||||
|
||||
@@ -11,7 +11,19 @@
|
||||
//! - => S3 as the source for the PGDATA instead of local filesystem
|
||||
//!
|
||||
//! TODOs before productionization:
|
||||
//! - ChunkProcessingJob size / ImportJob::total_size does not account for sharding.
|
||||
//! => produced image layers likely too small.
|
||||
//! - ChunkProcessingJob should cut up an ImportJob to hit exactly target image layer size.
|
||||
//! - asserts / unwraps need to be replaced with errors
|
||||
//! - don't trust remote objects will be small (=prevent OOMs in those cases)
|
||||
//! - limit all in-memory buffers in size, or download to disk and read from there
|
||||
//! - limit task concurrency
|
||||
//! - generally play nice with other tenants in the system
|
||||
//! - importbucket is different bucket than main pageserver storage, so, should be fine wrt S3 rate limits
|
||||
//! - but concerns like network bandwidth, local disk write bandwidth, local disk capacity, etc
|
||||
//! - integrate with layer eviction system
|
||||
//! - audit for Tenant::cancel nor Timeline::cancel responsivity
|
||||
//! - audit for Tenant/Timeline gate holding (we spawn tokio tasks during this flow!)
|
||||
//!
|
||||
//! An incomplete set of TODOs from the Hackathon:
|
||||
//! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)
|
||||
@@ -32,7 +44,7 @@ use pageserver_api::key::{
|
||||
rel_dir_to_key, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
|
||||
slru_segment_size_to_key,
|
||||
};
|
||||
use pageserver_api::keyspace::{ShardedRange, singleton_range};
|
||||
use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range, singleton_range};
|
||||
use pageserver_api::models::{ShardImportProgress, ShardImportProgressV1, ShardImportStatus};
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
@@ -130,15 +142,7 @@ async fn run_v1(
|
||||
|
||||
pausable_failpoint!("import-timeline-pre-execute-pausable");
|
||||
|
||||
let jobs_count = import_progress.as_ref().map(|p| p.jobs);
|
||||
let start_from_job_idx = import_progress.map(|progress| progress.completed);
|
||||
|
||||
tracing::info!(
|
||||
start_from_job_idx=?start_from_job_idx,
|
||||
jobs=?jobs_count,
|
||||
"Executing import plan"
|
||||
);
|
||||
|
||||
plan.execute(timeline, start_from_job_idx, plan_hash, &import_config, ctx)
|
||||
.await
|
||||
}
|
||||
@@ -163,7 +167,6 @@ impl Planner {
|
||||
/// This function is and must remain pure: given the same input, it will generate the same import plan.
|
||||
async fn plan(mut self, import_config: &TimelineImportConfig) -> anyhow::Result<Plan> {
|
||||
let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();
|
||||
anyhow::ensure!(pgdata_lsn.is_valid());
|
||||
|
||||
let datadir = PgDataDir::new(&self.storage).await?;
|
||||
|
||||
@@ -246,22 +249,14 @@ impl Planner {
|
||||
});
|
||||
|
||||
// Assigns parts of key space to later parallel jobs
|
||||
// Note: The image layers produced here may have gaps, meaning,
|
||||
// there is not an image for each key in the layer's key range.
|
||||
// The read path stops traversal at the first image layer, regardless
|
||||
// of whether a base image has been found for a key or not.
|
||||
// (Concept of sparse image layers doesn't exist.)
|
||||
// This behavior is exactly right for the base image layers we're producing here.
|
||||
// But, since no other place in the code currently produces image layers with gaps,
|
||||
// it seems noteworthy.
|
||||
let mut last_end_key = Key::MIN;
|
||||
let mut current_chunk = Vec::new();
|
||||
let mut current_chunk_size: usize = 0;
|
||||
let mut jobs = Vec::new();
|
||||
for task in std::mem::take(&mut self.tasks).into_iter() {
|
||||
let task_size = task.total_size(&self.shard);
|
||||
let projected_chunk_size = current_chunk_size.saturating_add(task_size);
|
||||
if projected_chunk_size > import_config.import_job_soft_size_limit.into() {
|
||||
if current_chunk_size + task.total_size()
|
||||
> import_config.import_job_soft_size_limit.into()
|
||||
{
|
||||
let key_range = last_end_key..task.key_range().start;
|
||||
jobs.push(ChunkProcessingJob::new(
|
||||
key_range.clone(),
|
||||
@@ -271,7 +266,7 @@ impl Planner {
|
||||
last_end_key = key_range.end;
|
||||
current_chunk_size = 0;
|
||||
}
|
||||
current_chunk_size = current_chunk_size.saturating_add(task_size);
|
||||
current_chunk_size += task.total_size();
|
||||
current_chunk.push(task);
|
||||
}
|
||||
jobs.push(ChunkProcessingJob::new(
|
||||
@@ -471,8 +466,6 @@ impl Plan {
|
||||
last_completed_job_idx = job_idx;
|
||||
|
||||
if last_completed_job_idx % checkpoint_every == 0 {
|
||||
tracing::info!(last_completed_job_idx, jobs=%jobs_in_plan, "Checkpointing import status");
|
||||
|
||||
let progress = ShardImportProgressV1 {
|
||||
jobs: jobs_in_plan,
|
||||
completed: last_completed_job_idx,
|
||||
@@ -611,18 +604,18 @@ impl PgDataDirDb {
|
||||
};
|
||||
|
||||
let path = datadir_path.join(rel_tag.to_segfile_name(segno));
|
||||
anyhow::ensure!(filesize % BLCKSZ as usize == 0);
|
||||
assert!(filesize % BLCKSZ as usize == 0); // TODO: this should result in an error
|
||||
let nblocks = filesize / BLCKSZ as usize;
|
||||
|
||||
Ok(PgDataDirDbFile {
|
||||
PgDataDirDbFile {
|
||||
path,
|
||||
filesize,
|
||||
rel_tag,
|
||||
segno,
|
||||
nblocks: Some(nblocks), // first non-cummulative sizes
|
||||
})
|
||||
}
|
||||
})
|
||||
.collect::<anyhow::Result<_, _>>()?;
|
||||
.collect();
|
||||
|
||||
// Set cummulative sizes. Do all of that math here, so that later we could easier
|
||||
// parallelize over segments and know with which segments we need to write relsize
|
||||
@@ -657,22 +650,12 @@ impl PgDataDirDb {
|
||||
trait ImportTask {
|
||||
fn key_range(&self) -> Range<Key>;
|
||||
|
||||
fn total_size(&self, shard_identity: &ShardIdentity) -> usize {
|
||||
let range = ShardedRange::new(self.key_range(), shard_identity);
|
||||
let page_count = range.page_count();
|
||||
if page_count == u32::MAX {
|
||||
tracing::warn!(
|
||||
"Import task has non contiguous key range: {}..{}",
|
||||
self.key_range().start,
|
||||
self.key_range().end
|
||||
);
|
||||
|
||||
// Tasks should operate on contiguous ranges. It is unexpected for
|
||||
// ranges to violate this assumption. Calling code handles this by mapping
|
||||
// any task on a non contiguous range to its own image layer.
|
||||
usize::MAX
|
||||
fn total_size(&self) -> usize {
|
||||
// TODO: revisit this
|
||||
if is_contiguous_range(&self.key_range()) {
|
||||
contiguous_range_len(&self.key_range()) as usize * 8192
|
||||
} else {
|
||||
page_count as usize * 8192
|
||||
u32::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
@@ -770,8 +753,6 @@ impl ImportTask for ImportRelBlocksTask {
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
const MAX_BYTE_RANGE_SIZE: usize = 4 * 1024 * 1024;
|
||||
|
||||
debug!("Importing relation file");
|
||||
|
||||
let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;
|
||||
@@ -796,7 +777,7 @@ impl ImportTask for ImportRelBlocksTask {
|
||||
assert_eq!(key.len(), 1);
|
||||
assert!(!acc.is_empty());
|
||||
assert!(acc_end > acc_start);
|
||||
if acc_end == start && end - acc_start <= MAX_BYTE_RANGE_SIZE {
|
||||
if acc_end == start /* TODO additional max range check here, to limit memory consumption per task to X */ {
|
||||
acc.push(key.pop().unwrap());
|
||||
Ok((acc, acc_start, end))
|
||||
} else {
|
||||
@@ -811,8 +792,8 @@ impl ImportTask for ImportRelBlocksTask {
|
||||
.get_range(&self.path, range_start.into_u64(), range_end.into_u64())
|
||||
.await?;
|
||||
let mut buf = Bytes::from(range_buf);
|
||||
// TODO: batched writes
|
||||
for key in keys {
|
||||
// The writer buffers writes internally
|
||||
let image = buf.split_to(8192);
|
||||
layer_writer.put_image(key, image, ctx).await?;
|
||||
nimages += 1;
|
||||
@@ -865,9 +846,6 @@ impl ImportTask for ImportSlruBlocksTask {
|
||||
debug!("Importing SLRU segment file {}", self.path);
|
||||
let buf = self.storage.get(&self.path).await?;
|
||||
|
||||
// TODO(vlad): Does timestamp to LSN work for imported timelines?
|
||||
// Probably not since we don't append the `xact_time` to it as in
|
||||
// [`WalIngest::ingest_xact_record`].
|
||||
let (kind, segno, start_blk) = self.key_range.start.to_slru_block()?;
|
||||
let (_kind, _segno, end_blk) = self.key_range.end.to_slru_block()?;
|
||||
let mut blknum = start_blk;
|
||||
|
||||
@@ -6,7 +6,7 @@ use bytes::Bytes;
|
||||
use postgres_ffi::ControlFileData;
|
||||
use remote_storage::{
|
||||
Download, DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, Listing,
|
||||
ListingObject, RemotePath, RemoteStorageConfig,
|
||||
ListingObject, RemotePath,
|
||||
};
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -22,9 +22,11 @@ pub async fn new(
|
||||
location: &index_part_format::Location,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<RemoteStorageWrapper, anyhow::Error> {
|
||||
// Downloads should be reasonably sized. We do ranged reads for relblock raw data
|
||||
// and full reads for SLRU segments which are bounded by Postgres.
|
||||
let timeout = RemoteStorageConfig::DEFAULT_TIMEOUT;
|
||||
// FIXME: we probably want some timeout, and we might be able to assume the max file
|
||||
// size on S3 is 1GiB (postgres segment size). But the problem is that the individual
|
||||
// downloaders don't know enough about concurrent downloads to make a guess on the
|
||||
// expected bandwidth and resulting best timeout.
|
||||
let timeout = std::time::Duration::from_secs(24 * 60 * 60);
|
||||
let location_storage = match location {
|
||||
#[cfg(feature = "testing")]
|
||||
index_part_format::Location::LocalFs { path } => {
|
||||
@@ -48,12 +50,9 @@ pub async fn new(
|
||||
.import_pgdata_aws_endpoint_url
|
||||
.clone()
|
||||
.map(|url| url.to_string()), // by specifying None here, remote_storage/aws-sdk-rust will infer from env
|
||||
// This matches the default import job concurrency. This is managed
|
||||
// separately from the usual S3 client, but the concern here is bandwidth
|
||||
// usage.
|
||||
concurrency_limit: 128.try_into().unwrap(),
|
||||
max_keys_per_list_response: Some(1000),
|
||||
upload_storage_class: None, // irrelevant
|
||||
concurrency_limit: 100.try_into().unwrap(), // TODO: think about this
|
||||
max_keys_per_list_response: Some(1000), // TODO: think about this
|
||||
upload_storage_class: None, // irrelevant
|
||||
},
|
||||
timeout,
|
||||
)
|
||||
|
||||
@@ -113,7 +113,7 @@ impl WalReceiver {
|
||||
}
|
||||
connection_manager_state.shutdown().await;
|
||||
*loop_status.write().unwrap() = None;
|
||||
info!("task exits");
|
||||
debug!("task exits");
|
||||
}
|
||||
.instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
|
||||
});
|
||||
|
||||
@@ -297,7 +297,6 @@ pub(super) async fn handle_walreceiver_connection(
|
||||
let mut expected_wal_start = startpoint;
|
||||
while let Some(replication_message) = {
|
||||
select! {
|
||||
biased;
|
||||
_ = cancellation.cancelled() => {
|
||||
debug!("walreceiver interrupted");
|
||||
None
|
||||
|
||||
@@ -17,23 +17,35 @@ pub(super) async fn authenticate(
|
||||
config: &'static AuthenticationConfig,
|
||||
secret: AuthSecret,
|
||||
) -> auth::Result<ComputeCredentials> {
|
||||
let flow = AuthFlow::new(client);
|
||||
let scram_keys = match secret {
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
AuthSecret::Md5(_) => {
|
||||
debug!("auth endpoint chooses MD5");
|
||||
return Err(auth::AuthError::MalformedPassword("MD5 not supported"));
|
||||
return Err(auth::AuthError::bad_auth_method("MD5"));
|
||||
}
|
||||
AuthSecret::Scram(secret) => {
|
||||
debug!("auth endpoint chooses SCRAM");
|
||||
let scram = auth::Scram(&secret, ctx);
|
||||
|
||||
let auth_outcome = tokio::time::timeout(
|
||||
config.scram_protocol_timeout,
|
||||
AuthFlow::new(client, auth::Scram(&secret, ctx)).authenticate(),
|
||||
async {
|
||||
|
||||
flow.begin(scram).await.map_err(|error| {
|
||||
warn!(?error, "error sending scram acknowledgement");
|
||||
error
|
||||
})?.authenticate().await.map_err(|error| {
|
||||
warn!(?error, "error processing scram messages");
|
||||
error
|
||||
})
|
||||
}
|
||||
)
|
||||
.await
|
||||
.inspect_err(|_| warn!("error processing scram messages error = authentication timed out, execution time exceeded {} seconds", config.scram_protocol_timeout.as_secs()))
|
||||
.map_err(auth::AuthError::user_timeout)?
|
||||
.inspect_err(|error| warn!(?error, "error processing scram messages"))?;
|
||||
.map_err(|e| {
|
||||
warn!("error processing scram messages error = authentication timed out, execution time exceeded {} seconds", config.scram_protocol_timeout.as_secs());
|
||||
auth::AuthError::user_timeout(e)
|
||||
})??;
|
||||
|
||||
let client_key = match auth_outcome {
|
||||
sasl::Outcome::Success(key) => key,
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::fmt;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use postgres_client::config::SslMode;
|
||||
use pq_proto::BeMessage as Be;
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{info, info_span};
|
||||
@@ -15,7 +16,6 @@ use crate::context::RequestContext;
|
||||
use crate::control_plane::client::cplane_proxy_v1;
|
||||
use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
use crate::pqproto::BeMessage;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
use crate::stream::PqStream;
|
||||
@@ -154,13 +154,11 @@ async fn authenticate(
|
||||
|
||||
// Give user a URL to spawn a new database.
|
||||
info!(parent: &span, "sending the auth URL to the user");
|
||||
client.write_message(BeMessage::AuthenticationOk);
|
||||
client.write_message(BeMessage::ParameterStatus {
|
||||
name: b"client_encoding",
|
||||
value: b"UTF8",
|
||||
});
|
||||
client.write_message(BeMessage::NoticeResponse(&greeting));
|
||||
client.flush().await?;
|
||||
client
|
||||
.write_message_noflush(&Be::AuthenticationOk)?
|
||||
.write_message_noflush(&Be::CLIENT_ENCODING)?
|
||||
.write_message(&Be::NoticeResponse(&greeting))
|
||||
.await?;
|
||||
|
||||
// Wait for console response via control plane (see `mgmt`).
|
||||
info!(parent: &span, "waiting for console's reply...");
|
||||
@@ -190,7 +188,7 @@ async fn authenticate(
|
||||
}
|
||||
}
|
||||
|
||||
client.write_message(BeMessage::NoticeResponse("Connecting to database."));
|
||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
||||
|
||||
// This config should be self-contained, because we won't
|
||||
// take username or dbname from client's startup message.
|
||||
|
||||
@@ -24,25 +24,23 @@ pub(crate) async fn authenticate_cleartext(
|
||||
debug!("cleartext auth flow override is enabled, proceeding");
|
||||
ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
|
||||
|
||||
// pause the timer while we communicate with the client
|
||||
let paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
let ep = EndpointIdInt::from(&info.endpoint);
|
||||
|
||||
let auth_flow = AuthFlow::new(
|
||||
client,
|
||||
auth::CleartextPassword {
|
||||
let auth_flow = AuthFlow::new(client)
|
||||
.begin(auth::CleartextPassword {
|
||||
secret,
|
||||
endpoint: ep,
|
||||
pool: config.thread_pool.clone(),
|
||||
},
|
||||
);
|
||||
let auth_outcome = {
|
||||
// pause the timer while we communicate with the client
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
// cleartext auth is only allowed to the ws/http protocol.
|
||||
// If we're here, we already received the password in the first message.
|
||||
// Scram protocol will be executed on the proxy side.
|
||||
auth_flow.authenticate().await?
|
||||
};
|
||||
})
|
||||
.await?;
|
||||
drop(paused);
|
||||
// cleartext auth is only allowed to the ws/http protocol.
|
||||
// If we're here, we already received the password in the first message.
|
||||
// Scram protocol will be executed on the proxy side.
|
||||
let auth_outcome = auth_flow.authenticate().await?;
|
||||
|
||||
let keys = match auth_outcome {
|
||||
sasl::Outcome::Success(key) => key,
|
||||
@@ -69,7 +67,9 @@ pub(crate) async fn password_hack_no_authentication(
|
||||
// pause the timer while we communicate with the client
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
let payload = AuthFlow::new(client, auth::PasswordHack)
|
||||
let payload = AuthFlow::new(client)
|
||||
.begin(auth::PasswordHack)
|
||||
.await?
|
||||
.get_password()
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -4,16 +4,19 @@ mod hacks;
|
||||
pub mod jwt;
|
||||
pub mod local;
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use console_redirect::ConsoleRedirectBackend;
|
||||
pub(crate) use console_redirect::ConsoleRedirectError;
|
||||
use ipnet::{Ipv4Net, Ipv6Net};
|
||||
use local::LocalBackend;
|
||||
use postgres_client::config::AuthKeys;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::{debug, info};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::auth::credentials::check_peer_addr_is_in_list;
|
||||
use crate::auth::{self, AuthError, ComputeUserInfoMaybeEndpoint, validate_password_and_exchange};
|
||||
use crate::cache::Cached;
|
||||
use crate::config::AuthenticationConfig;
|
||||
@@ -21,14 +24,15 @@ use crate::context::RequestContext;
|
||||
use crate::control_plane::client::ControlPlaneClient;
|
||||
use crate::control_plane::errors::GetAuthInfoError;
|
||||
use crate::control_plane::{
|
||||
self, AccessBlockerFlags, AuthSecret, CachedNodeInfo, ControlPlaneApi, EndpointAccessControl,
|
||||
RoleAccessControl,
|
||||
self, AccessBlockerFlags, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
|
||||
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi,
|
||||
};
|
||||
use crate::intern::EndpointIdInt;
|
||||
use crate::pqproto::BeMessage;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::proxy::connect_compute::ComputeConnectBackend;
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
|
||||
use crate::stream::Stream;
|
||||
use crate::types::{EndpointCacheKey, EndpointId, RoleName};
|
||||
use crate::{scram, stream};
|
||||
@@ -131,16 +135,6 @@ impl<'a, T> Backend<'a, T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a, T, E> Backend<'a, Result<T, E>> {
|
||||
/// Very similar to [`std::option::Option::transpose`].
|
||||
/// This is most useful for error handling.
|
||||
pub(crate) fn transpose(self) -> Result<Backend<'a, T>, E> {
|
||||
match self {
|
||||
Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
|
||||
Self::Local(l) => Ok(Backend::Local(l)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ComputeCredentials {
|
||||
pub(crate) info: ComputeUserInfo,
|
||||
@@ -194,6 +188,78 @@ impl TryFrom<ComputeUserInfoMaybeEndpoint> for ComputeUserInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, PartialOrd, Hash, Eq, Ord, Debug, Copy, Clone)]
|
||||
pub struct MaskedIp(IpAddr);
|
||||
|
||||
impl MaskedIp {
|
||||
fn new(value: IpAddr, prefix: u8) -> Self {
|
||||
match value {
|
||||
IpAddr::V4(v4) => Self(IpAddr::V4(
|
||||
Ipv4Net::new(v4, prefix).map_or(v4, |x| x.trunc().addr()),
|
||||
)),
|
||||
IpAddr::V6(v6) => Self(IpAddr::V6(
|
||||
Ipv6Net::new(v6, prefix).map_or(v6, |x| x.trunc().addr()),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This can't be just per IP because that would limit some PaaS that share IP addresses
|
||||
pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, MaskedIp)>;
|
||||
|
||||
impl AuthenticationConfig {
|
||||
pub(crate) fn check_rate_limit(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
secret: AuthSecret,
|
||||
endpoint: &EndpointId,
|
||||
is_cleartext: bool,
|
||||
) -> auth::Result<AuthSecret> {
|
||||
// we have validated the endpoint exists, so let's intern it.
|
||||
let endpoint_int = EndpointIdInt::from(endpoint.normalize());
|
||||
|
||||
// only count the full hash count if password hack or websocket flow.
|
||||
// in other words, if proxy needs to run the hashing
|
||||
let password_weight = if is_cleartext {
|
||||
match &secret {
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
AuthSecret::Md5(_) => 1,
|
||||
AuthSecret::Scram(s) => s.iterations + 1,
|
||||
}
|
||||
} else {
|
||||
// validating scram takes just 1 hmac_sha_256 operation.
|
||||
1
|
||||
};
|
||||
|
||||
let limit_not_exceeded = self.rate_limiter.check(
|
||||
(
|
||||
endpoint_int,
|
||||
MaskedIp::new(ctx.peer_addr(), self.rate_limit_ip_subnet),
|
||||
),
|
||||
password_weight,
|
||||
);
|
||||
|
||||
if !limit_not_exceeded {
|
||||
warn!(
|
||||
enabled = self.rate_limiter_enabled,
|
||||
"rate limiting authentication"
|
||||
);
|
||||
Metrics::get().proxy.requests_auth_rate_limits_total.inc();
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.endpoints_auth_rate_limits
|
||||
.get_metric()
|
||||
.measure(endpoint);
|
||||
|
||||
if self.rate_limiter_enabled {
|
||||
return Err(auth::AuthError::too_many_connections());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(secret)
|
||||
}
|
||||
}
|
||||
|
||||
/// True to its name, this function encapsulates our current auth trade-offs.
|
||||
/// Here, we choose the appropriate auth flow based on circumstances.
|
||||
///
|
||||
@@ -222,27 +288,52 @@ async fn auth_quirks(
|
||||
|
||||
debug!("fetching authentication info and allowlists");
|
||||
|
||||
let access_controls = api
|
||||
.get_endpoint_access_control(ctx, &info.endpoint, &info.user)
|
||||
.await?;
|
||||
// check allowed list
|
||||
if config.ip_allowlist_check_enabled {
|
||||
let allowed_ips = api.get_allowed_ips(ctx, &info).await?;
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
|
||||
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
|
||||
}
|
||||
}
|
||||
|
||||
access_controls.check(
|
||||
ctx,
|
||||
config.ip_allowlist_check_enabled,
|
||||
config.is_vpc_acccess_proxy,
|
||||
)?;
|
||||
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
|
||||
let access_blocks = api.get_block_public_or_vpc_access(ctx, &info).await?;
|
||||
if config.is_vpc_acccess_proxy {
|
||||
if access_blocks.vpc_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
let endpoint = EndpointIdInt::from(&info.endpoint);
|
||||
let rate_limit_config = None;
|
||||
if !endpoint_rate_limiter.check(endpoint, rate_limit_config, 1) {
|
||||
let incoming_vpc_endpoint_id = match ctx.extra() {
|
||||
None => return Err(AuthError::MissingEndpointName),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
let allowed_vpc_endpoint_ids = api.get_allowed_vpc_endpoint_ids(ctx, &info).await?;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !allowed_vpc_endpoint_ids.is_empty()
|
||||
&& !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
|
||||
{
|
||||
return Err(AuthError::vpc_endpoint_id_not_allowed(
|
||||
incoming_vpc_endpoint_id,
|
||||
));
|
||||
}
|
||||
} else if access_blocks.public_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
|
||||
return Err(AuthError::too_many_connections());
|
||||
}
|
||||
let role_access = api
|
||||
.get_role_access_control(ctx, &info.endpoint, &info.user)
|
||||
.await?;
|
||||
let cached_secret = api.get_role_secret(ctx, &info).await?;
|
||||
let (cached_entry, secret) = cached_secret.take_value();
|
||||
|
||||
let secret = if let Some(secret) = role_access.secret {
|
||||
secret
|
||||
let secret = if let Some(secret) = secret {
|
||||
config.check_rate_limit(
|
||||
ctx,
|
||||
secret,
|
||||
&info.endpoint,
|
||||
unauthenticated_password.is_some() || allow_cleartext,
|
||||
)?
|
||||
} else {
|
||||
// If we don't have an authentication secret, we mock one to
|
||||
// prevent malicious probing (possible due to missing protocol steps).
|
||||
@@ -263,7 +354,13 @@ async fn auth_quirks(
|
||||
.await
|
||||
{
|
||||
Ok(keys) => Ok(keys),
|
||||
Err(e) => Err(e),
|
||||
Err(e) => {
|
||||
if e.is_password_failed() {
|
||||
// The password could have been changed, so we invalidate the cache.
|
||||
cached_entry.invalidate();
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,7 +387,7 @@ async fn authenticate_with_secret(
|
||||
};
|
||||
|
||||
// we have authenticated the password
|
||||
client.write_message(BeMessage::AuthenticationOk);
|
||||
client.write_message_noflush(&pq_proto::BeMessage::AuthenticationOk)?;
|
||||
|
||||
return Ok(ComputeCredentials { info, keys });
|
||||
}
|
||||
@@ -308,71 +405,39 @@ async fn authenticate_with_secret(
|
||||
classic::authenticate(ctx, info, client, config, secret).await
|
||||
}
|
||||
|
||||
impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
|
||||
/// Get username from the credentials.
|
||||
pub(crate) fn get_user(&self) -> &str {
|
||||
match self {
|
||||
Self::ControlPlane(_, user_info) => &user_info.user,
|
||||
Self::Local(_) => "local",
|
||||
}
|
||||
}
|
||||
|
||||
impl ControlPlaneClient {
|
||||
/// Authenticate the client via the requested backend, possibly using credentials.
|
||||
#[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
|
||||
pub(crate) async fn authenticate(
|
||||
self,
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
|
||||
user_info: ComputeUserInfoMaybeEndpoint,
|
||||
allow_cleartext: bool,
|
||||
config: &'static AuthenticationConfig,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
) -> auth::Result<Backend<'a, ComputeCredentials>> {
|
||||
let res = match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
debug!(
|
||||
user = &*user_info.user,
|
||||
project = user_info.endpoint(),
|
||||
"performing authentication using the console"
|
||||
);
|
||||
) -> auth::Result<ComputeCredentials> {
|
||||
debug!(
|
||||
user = &*user_info.user,
|
||||
project = user_info.endpoint(),
|
||||
"performing authentication using the console"
|
||||
);
|
||||
|
||||
let auth_res = auth_quirks(
|
||||
ctx,
|
||||
&*api,
|
||||
user_info.clone(),
|
||||
client,
|
||||
allow_cleartext,
|
||||
config,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await;
|
||||
match auth_res {
|
||||
Ok(credentials) => Ok(Backend::ControlPlane(api, credentials)),
|
||||
Err(e) => {
|
||||
// The password could have been changed, so we invalidate the cache.
|
||||
// We should only invalidate the cache if the TTL might have expired.
|
||||
if e.is_password_failed() {
|
||||
#[allow(irrefutable_let_patterns)]
|
||||
if let ControlPlaneClient::ProxyV1(api) = &*api {
|
||||
if let Some(ep) = &user_info.endpoint_id {
|
||||
api.caches
|
||||
.project_info
|
||||
.maybe_invalidate_role_secret(ep, &user_info.user);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
Self::Local(_) => {
|
||||
return Err(auth::AuthError::bad_auth_method("invalid for local proxy"));
|
||||
}
|
||||
};
|
||||
let credentials = auth_quirks(
|
||||
ctx,
|
||||
self,
|
||||
user_info,
|
||||
client,
|
||||
allow_cleartext,
|
||||
config,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// TODO: replace with some metric
|
||||
info!("user successfully authenticated");
|
||||
res
|
||||
|
||||
Ok(credentials)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,30 +445,44 @@ impl Backend<'_, ComputeUserInfo> {
|
||||
pub(crate) async fn get_role_secret(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<RoleAccessControl, GetAuthInfoError> {
|
||||
) -> Result<CachedRoleSecret, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_role_access_control(ctx, &user_info.endpoint, &user_info.user)
|
||||
.await
|
||||
}
|
||||
Self::Local(_) => Ok(RoleAccessControl { secret: None }),
|
||||
Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
|
||||
Self::Local(_) => Ok(Cached::new_uncached(None)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_endpoint_access_control(
|
||||
pub(crate) async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<EndpointAccessControl, GetAuthInfoError> {
|
||||
) -> Result<CachedAllowedIps, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
|
||||
Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_endpoint_access_control(ctx, &user_info.endpoint, &user_info.user)
|
||||
.await
|
||||
api.get_allowed_vpc_endpoint_ids(ctx, user_info).await
|
||||
}
|
||||
Self::Local(_) => Ok(EndpointAccessControl {
|
||||
allowed_ips: Arc::new(vec![]),
|
||||
allowed_vpce: Arc::new(vec![]),
|
||||
flags: AccessBlockerFlags::default(),
|
||||
}),
|
||||
Self::Local(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ControlPlane(api, user_info) => {
|
||||
api.get_block_public_or_vpc_access(ctx, user_info).await
|
||||
}
|
||||
Self::Local(_) => Ok(Cached::new_uncached(AccessBlockerFlags::default())),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -428,11 +507,32 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ControlPlaneWakeCompute<'a> {
|
||||
pub cplane: &'a ControlPlaneClient,
|
||||
pub creds: ComputeCredentials,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ComputeConnectBackend for ControlPlaneWakeCompute<'_> {
|
||||
async fn wake_compute(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
|
||||
self.cplane.wake_compute(ctx, &self.creds.info).await
|
||||
}
|
||||
|
||||
fn get_keys(&self) -> &ComputeCredentialKeys {
|
||||
&self.creds.keys
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#![allow(clippy::unimplemented, clippy::unwrap_used)]
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::BytesMut;
|
||||
use control_plane::AuthSecret;
|
||||
@@ -442,17 +542,20 @@ mod tests {
|
||||
use postgres_protocol::message::backend::Message as PgMessage;
|
||||
use postgres_protocol::message::frontend;
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};
|
||||
use tokio_util::task::TaskTracker;
|
||||
|
||||
use super::auth_quirks;
|
||||
use super::jwt::JwkCache;
|
||||
use super::{AuthRateLimiter, auth_quirks};
|
||||
use crate::auth::backend::MaskedIp;
|
||||
use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
|
||||
use crate::config::AuthenticationConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::{
|
||||
self, AccessBlockerFlags, CachedNodeInfo, EndpointAccessControl, RoleAccessControl,
|
||||
self, AccessBlockerFlags, CachedAccessBlockerFlags, CachedAllowedIps,
|
||||
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret,
|
||||
};
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
|
||||
use crate::scram::ServerSecret;
|
||||
use crate::scram::threadpool::ThreadPool;
|
||||
use crate::stream::{PqStream, Stream};
|
||||
@@ -465,34 +568,46 @@ mod tests {
|
||||
}
|
||||
|
||||
impl control_plane::ControlPlaneApi for Auth {
|
||||
async fn get_role_access_control(
|
||||
async fn get_role_secret(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_endpoint: &crate::types::EndpointId,
|
||||
_role: &crate::types::RoleName,
|
||||
) -> Result<RoleAccessControl, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(RoleAccessControl {
|
||||
secret: Some(self.secret.clone()),
|
||||
})
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
|
||||
}
|
||||
|
||||
async fn get_endpoint_access_control(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_endpoint: &crate::types::EndpointId,
|
||||
_role: &crate::types::RoleName,
|
||||
) -> Result<EndpointAccessControl, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(EndpointAccessControl {
|
||||
allowed_ips: Arc::new(self.ips.clone()),
|
||||
allowed_vpce: Arc::new(self.vpc_endpoint_ids.clone()),
|
||||
flags: self.access_blocker_flags,
|
||||
})
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())))
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedAllowedVpcEndpointIds::new_uncached(Arc::new(
|
||||
self.vpc_endpoint_ids.clone(),
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_user_info: &super::ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError> {
|
||||
Ok(CachedAccessBlockerFlags::new_uncached(
|
||||
self.access_blocker_flags.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
_endpoint: &crate::types::EndpointId,
|
||||
_endpoint: crate::types::EndpointId,
|
||||
) -> Result<Vec<super::jwt::AuthRule>, control_plane::errors::GetEndpointJwksError>
|
||||
{
|
||||
unimplemented!()
|
||||
@@ -511,6 +626,9 @@ mod tests {
|
||||
jwks_cache: JwkCache::default(),
|
||||
thread_pool: ThreadPool::new(1),
|
||||
scram_protocol_timeout: std::time::Duration::from_secs(5),
|
||||
rate_limiter_enabled: true,
|
||||
rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
|
||||
rate_limit_ip_subnet: 64,
|
||||
ip_allowlist_check_enabled: true,
|
||||
is_vpc_acccess_proxy: false,
|
||||
is_auth_broker: false,
|
||||
@@ -527,10 +645,55 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn masked_ip() {
|
||||
let ip_a = IpAddr::V4([127, 0, 0, 1].into());
|
||||
let ip_b = IpAddr::V4([127, 0, 0, 2].into());
|
||||
let ip_c = IpAddr::V4([192, 168, 1, 101].into());
|
||||
let ip_d = IpAddr::V4([192, 168, 1, 102].into());
|
||||
let ip_e = IpAddr::V6("abcd:abcd:abcd:abcd:abcd:abcd:abcd:abcd".parse().unwrap());
|
||||
let ip_f = IpAddr::V6("abcd:abcd:abcd:abcd:1234:abcd:abcd:abcd".parse().unwrap());
|
||||
|
||||
assert_ne!(MaskedIp::new(ip_a, 64), MaskedIp::new(ip_b, 64));
|
||||
assert_ne!(MaskedIp::new(ip_a, 32), MaskedIp::new(ip_b, 32));
|
||||
assert_eq!(MaskedIp::new(ip_a, 30), MaskedIp::new(ip_b, 30));
|
||||
assert_eq!(MaskedIp::new(ip_c, 30), MaskedIp::new(ip_d, 30));
|
||||
|
||||
assert_ne!(MaskedIp::new(ip_e, 128), MaskedIp::new(ip_f, 128));
|
||||
assert_eq!(MaskedIp::new(ip_e, 64), MaskedIp::new(ip_f, 64));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_auth_rate_limit_set() {
|
||||
// these values used to exceed u32::MAX
|
||||
assert_eq!(
|
||||
RateBucketInfo::DEFAULT_AUTH_SET,
|
||||
[
|
||||
RateBucketInfo {
|
||||
interval: Duration::from_secs(1),
|
||||
max_rpi: 1000 * 4096,
|
||||
},
|
||||
RateBucketInfo {
|
||||
interval: Duration::from_secs(60),
|
||||
max_rpi: 600 * 4096 * 60,
|
||||
},
|
||||
RateBucketInfo {
|
||||
interval: Duration::from_secs(600),
|
||||
max_rpi: 300 * 4096 * 600,
|
||||
}
|
||||
]
|
||||
);
|
||||
|
||||
for x in RateBucketInfo::DEFAULT_AUTH_SET {
|
||||
let y = x.to_string().parse().unwrap();
|
||||
assert_eq!(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn auth_quirks_scram() {
|
||||
let (mut client, server) = tokio::io::duplex(1024);
|
||||
let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));
|
||||
let mut stream = PqStream::new(Stream::from_raw(server), TaskTracker::new().token());
|
||||
|
||||
let ctx = RequestContext::test();
|
||||
let api = Auth {
|
||||
@@ -612,7 +775,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn auth_quirks_cleartext() {
|
||||
let (mut client, server) = tokio::io::duplex(1024);
|
||||
let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));
|
||||
let mut stream = PqStream::new(Stream::from_raw(server), TaskTracker::new().token());
|
||||
|
||||
let ctx = RequestContext::test();
|
||||
let api = Auth {
|
||||
@@ -666,7 +829,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn auth_quirks_password_hack() {
|
||||
let (mut client, server) = tokio::io::duplex(1024);
|
||||
let mut stream = PqStream::new_skip_handshake(Stream::from_raw(server));
|
||||
let mut stream = PqStream::new(Stream::from_raw(server), TaskTracker::new().token());
|
||||
|
||||
let ctx = RequestContext::test();
|
||||
let api = Auth {
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::net::IpAddr;
|
||||
use std::str::FromStr;
|
||||
|
||||
use itertools::Itertools;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
@@ -12,7 +13,6 @@ use crate::auth::password_hack::parse_endpoint_param;
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
use crate::metrics::{Metrics, SniGroup, SniKind};
|
||||
use crate::pqproto::StartupMessageParams;
|
||||
use crate::proxy::NeonOptions;
|
||||
use crate::serverless::{AUTH_BROKER_SNI, SERVERLESS_DRIVER_SNI};
|
||||
use crate::types::{EndpointId, RoleName};
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
//! Main authentication flow.
|
||||
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
||||
use postgres_protocol::authentication::sasl::{SCRAM_SHA_256, SCRAM_SHA_256_PLUS};
|
||||
use pq_proto::{BeAuthenticationSaslMessage, BeMessage, BeMessage as Be};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::info;
|
||||
|
||||
@@ -11,26 +13,35 @@ use super::{AuthError, PasswordHackPayload};
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::AuthSecret;
|
||||
use crate::intern::EndpointIdInt;
|
||||
use crate::pqproto::{BeAuthenticationSaslMessage, BeMessage};
|
||||
use crate::sasl;
|
||||
use crate::scram::threadpool::ThreadPool;
|
||||
use crate::scram::{self};
|
||||
use crate::stream::{PqStream, Stream};
|
||||
use crate::tls::TlsServerEndPoint;
|
||||
|
||||
/// Every authentication selector is supposed to implement this trait.
|
||||
pub(crate) trait AuthMethod {
|
||||
/// Any authentication selector should provide initial backend message
|
||||
/// containing auth method name and parameters, e.g. md5 salt.
|
||||
fn first_message(&self, channel_binding: bool) -> BeMessage<'_>;
|
||||
}
|
||||
|
||||
/// Initial state of [`AuthFlow`].
|
||||
pub(crate) struct Begin;
|
||||
|
||||
/// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`].
|
||||
pub(crate) struct Scram<'a>(
|
||||
pub(crate) &'a scram::ServerSecret,
|
||||
pub(crate) &'a RequestContext,
|
||||
);
|
||||
|
||||
impl Scram<'_> {
|
||||
impl AuthMethod for Scram<'_> {
|
||||
#[inline(always)]
|
||||
fn first_message(&self, channel_binding: bool) -> BeMessage<'_> {
|
||||
if channel_binding {
|
||||
BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
|
||||
Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(scram::METHODS))
|
||||
} else {
|
||||
BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(
|
||||
Be::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(
|
||||
scram::METHODS_WITHOUT_PLUS,
|
||||
))
|
||||
}
|
||||
@@ -41,6 +52,13 @@ impl Scram<'_> {
|
||||
/// <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.
|
||||
pub(crate) struct PasswordHack;
|
||||
|
||||
impl AuthMethod for PasswordHack {
|
||||
#[inline(always)]
|
||||
fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
|
||||
Be::AuthenticationCleartextPassword
|
||||
}
|
||||
}
|
||||
|
||||
/// Use clear-text password auth called `password` in docs
|
||||
/// <https://www.postgresql.org/docs/current/auth-password.html>
|
||||
pub(crate) struct CleartextPassword {
|
||||
@@ -49,37 +67,53 @@ pub(crate) struct CleartextPassword {
|
||||
pub(crate) secret: AuthSecret,
|
||||
}
|
||||
|
||||
impl AuthMethod for CleartextPassword {
|
||||
#[inline(always)]
|
||||
fn first_message(&self, _channel_binding: bool) -> BeMessage<'_> {
|
||||
Be::AuthenticationCleartextPassword
|
||||
}
|
||||
}
|
||||
|
||||
/// This wrapper for [`PqStream`] performs client authentication.
|
||||
#[must_use]
|
||||
pub(crate) struct AuthFlow<'a, S, State> {
|
||||
/// The underlying stream which implements libpq's protocol.
|
||||
stream: &'a mut PqStream<Stream<S>>,
|
||||
/// State might contain ancillary data.
|
||||
/// State might contain ancillary data (see [`Self::begin`]).
|
||||
state: State,
|
||||
tls_server_end_point: TlsServerEndPoint,
|
||||
}
|
||||
|
||||
/// Initial state of the stream wrapper.
|
||||
impl<'a, S: AsyncRead + AsyncWrite + Unpin, M> AuthFlow<'a, S, M> {
|
||||
impl<'a, S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'a, S, Begin> {
|
||||
/// Create a new wrapper for client authentication.
|
||||
pub(crate) fn new(stream: &'a mut PqStream<Stream<S>>, method: M) -> Self {
|
||||
pub(crate) fn new(stream: &'a mut PqStream<Stream<S>>) -> Self {
|
||||
let tls_server_end_point = stream.get_ref().tls_server_end_point();
|
||||
|
||||
Self {
|
||||
stream,
|
||||
state: method,
|
||||
state: Begin,
|
||||
tls_server_end_point,
|
||||
}
|
||||
}
|
||||
|
||||
/// Move to the next step by sending auth method's name & params to client.
|
||||
pub(crate) async fn begin<M: AuthMethod>(self, method: M) -> io::Result<AuthFlow<'a, S, M>> {
|
||||
self.stream
|
||||
.write_message(&method.first_message(self.tls_server_end_point.supported()))
|
||||
.await?;
|
||||
|
||||
Ok(AuthFlow {
|
||||
stream: self.stream,
|
||||
state: method,
|
||||
tls_server_end_point: self.tls_server_end_point,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
|
||||
/// Perform user authentication. Raise an error in case authentication failed.
|
||||
pub(crate) async fn get_password(self) -> super::Result<PasswordHackPayload> {
|
||||
self.stream
|
||||
.write_message(BeMessage::AuthenticationCleartextPassword);
|
||||
self.stream.flush().await?;
|
||||
|
||||
let msg = self.stream.read_password_message().await?;
|
||||
let password = msg
|
||||
.strip_suffix(&[0])
|
||||
@@ -99,10 +133,6 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, CleartextPassword> {
|
||||
/// Perform user authentication. Raise an error in case authentication failed.
|
||||
pub(crate) async fn authenticate(self) -> super::Result<sasl::Outcome<ComputeCredentialKeys>> {
|
||||
self.stream
|
||||
.write_message(BeMessage::AuthenticationCleartextPassword);
|
||||
self.stream.flush().await?;
|
||||
|
||||
let msg = self.stream.read_password_message().await?;
|
||||
let password = msg
|
||||
.strip_suffix(&[0])
|
||||
@@ -117,7 +147,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, CleartextPassword> {
|
||||
.await?;
|
||||
|
||||
if let sasl::Outcome::Success(_) = &outcome {
|
||||
self.stream.write_message(BeMessage::AuthenticationOk);
|
||||
self.stream.write_message_noflush(&Be::AuthenticationOk)?;
|
||||
}
|
||||
|
||||
Ok(outcome)
|
||||
@@ -129,36 +159,42 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
|
||||
/// Perform user authentication. Raise an error in case authentication failed.
|
||||
pub(crate) async fn authenticate(self) -> super::Result<sasl::Outcome<scram::ScramKey>> {
|
||||
let Scram(secret, ctx) = self.state;
|
||||
let channel_binding = self.tls_server_end_point;
|
||||
|
||||
// send sasl message.
|
||||
{
|
||||
// pause the timer while we communicate with the client
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
// pause the timer while we communicate with the client
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
let sasl = self.state.first_message(channel_binding.supported());
|
||||
self.stream.write_message(sasl);
|
||||
self.stream.flush().await?;
|
||||
// Initial client message contains the chosen auth method's name.
|
||||
let msg = self.stream.read_password_message().await?;
|
||||
let sasl = sasl::FirstMessage::parse(&msg)
|
||||
.ok_or(AuthError::MalformedPassword("bad sasl message"))?;
|
||||
|
||||
// Currently, the only supported SASL method is SCRAM.
|
||||
if !scram::METHODS.contains(&sasl.method) {
|
||||
return Err(super::AuthError::bad_auth_method(sasl.method));
|
||||
}
|
||||
|
||||
// complete sasl handshake.
|
||||
sasl::authenticate(ctx, self.stream, |method| {
|
||||
// Currently, the only supported SASL method is SCRAM.
|
||||
match method {
|
||||
SCRAM_SHA_256 => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256),
|
||||
SCRAM_SHA_256_PLUS => {
|
||||
ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus);
|
||||
}
|
||||
method => return Err(sasl::Error::BadAuthMethod(method.into())),
|
||||
}
|
||||
match sasl.method {
|
||||
SCRAM_SHA_256 => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256),
|
||||
SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// TODO: make this a metric instead
|
||||
info!("client chooses {}", method);
|
||||
// TODO: make this a metric instead
|
||||
info!("client chooses {}", sasl.method);
|
||||
|
||||
Ok(scram::Exchange::new(secret, rand::random, channel_binding))
|
||||
})
|
||||
.await
|
||||
.map_err(AuthError::Sasl)
|
||||
let outcome = sasl::SaslStream::new(self.stream, sasl.message)
|
||||
.authenticate(scram::Exchange::new(
|
||||
secret,
|
||||
rand::random,
|
||||
self.tls_server_end_point,
|
||||
))
|
||||
.await?;
|
||||
|
||||
if let sasl::Outcome::Success(_) = &outcome {
|
||||
self.stream.write_message_noflush(&Be::AuthenticationOk)?;
|
||||
}
|
||||
|
||||
Ok(outcome)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Client authentication mechanisms.
|
||||
|
||||
pub mod backend;
|
||||
pub use backend::Backend;
|
||||
pub use backend::{Backend, ControlPlaneWakeCompute};
|
||||
|
||||
mod credentials;
|
||||
pub(crate) use credentials::{
|
||||
|
||||
@@ -32,7 +32,9 @@ use crate::ext::TaskExt;
|
||||
use crate::http::health_server::AppMetrics;
|
||||
use crate::intern::RoleNameInt;
|
||||
use crate::metrics::{Metrics, ThreadPoolMetrics};
|
||||
use crate::rate_limiter::{EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo};
|
||||
use crate::rate_limiter::{
|
||||
BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo,
|
||||
};
|
||||
use crate::scram::threadpool::ThreadPool;
|
||||
use crate::serverless::cancel_set::CancelSet;
|
||||
use crate::serverless::{self, GlobalConnPoolOptions};
|
||||
@@ -67,6 +69,15 @@ struct LocalProxyCliArgs {
|
||||
/// Can be given multiple times for different bucket sizes.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
|
||||
user_rps_limit: Vec<RateBucketInfo>,
|
||||
/// Whether the auth rate limiter actually takes effect (for testing)
|
||||
#[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
|
||||
auth_rate_limit_enabled: bool,
|
||||
/// Authentication rate limiter max number of hashes per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
|
||||
auth_rate_limit: Vec<RateBucketInfo>,
|
||||
/// The IP subnet to use when considering whether two IP addresses are considered the same.
|
||||
#[clap(long, default_value_t = 64)]
|
||||
auth_rate_limit_ip_subnet: u8,
|
||||
/// Whether to retry the connection to the compute node
|
||||
#[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
|
||||
connect_to_compute_retry: String,
|
||||
@@ -271,6 +282,9 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
|
||||
jwks_cache: JwkCache::default(),
|
||||
thread_pool: ThreadPool::new(0),
|
||||
scram_protocol_timeout: Duration::from_secs(10),
|
||||
rate_limiter_enabled: false,
|
||||
rate_limiter: BucketRateLimiter::new(vec![]),
|
||||
rate_limit_ip_subnet: 64,
|
||||
ip_allowlist_check_enabled: true,
|
||||
is_vpc_acccess_proxy: false,
|
||||
is_auth_broker: false,
|
||||
|
||||
@@ -4,9 +4,8 @@
|
||||
//! This allows connecting to pods/services running in the same Kubernetes cluster from
|
||||
//! the outside. Similar to an ingress controller for HTTPS.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::{net::SocketAddr, sync::Arc};
|
||||
|
||||
use anyhow::{Context, anyhow, bail, ensure};
|
||||
use clap::Arg;
|
||||
@@ -18,20 +17,18 @@ use rustls::pki_types::{DnsName, PrivateKeyDer};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio_rustls::TlsConnector;
|
||||
use tokio_rustls::server::TlsStream;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::{Instrument, error, info};
|
||||
use utils::project_git_version;
|
||||
use utils::sentry_init::init_sentry;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
use crate::metrics::{Metrics, ThreadPoolMetrics};
|
||||
use crate::pqproto::FeStartupPacket;
|
||||
use crate::protocol2::ConnectionInfo;
|
||||
use crate::proxy::{
|
||||
ErrorSource, TlsRequired, copy_bidirectional_client_compute, run_until_cancelled,
|
||||
};
|
||||
use crate::proxy::{ErrorSource, copy_bidirectional_client_compute, run_until_cancelled};
|
||||
use crate::stream::{PqStream, Stream};
|
||||
use crate::tls::TlsServerEndPoint;
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
@@ -88,7 +85,7 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
.parse()?;
|
||||
|
||||
// Configure TLS
|
||||
let tls_config = match (
|
||||
let (tls_config, tls_server_end_point): (Arc<rustls::ServerConfig>, TlsServerEndPoint) = match (
|
||||
args.get_one::<String>("tls-key"),
|
||||
args.get_one::<String>("tls-cert"),
|
||||
) {
|
||||
@@ -121,6 +118,7 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
dest.clone(),
|
||||
tls_config.clone(),
|
||||
None,
|
||||
tls_server_end_point,
|
||||
proxy_listener,
|
||||
cancellation_token.clone(),
|
||||
))
|
||||
@@ -130,6 +128,7 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
dest,
|
||||
tls_config,
|
||||
Some(compute_tls_config),
|
||||
tls_server_end_point,
|
||||
proxy_listener_compute_tls,
|
||||
cancellation_token.clone(),
|
||||
))
|
||||
@@ -156,7 +155,7 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
pub(super) fn parse_tls(
|
||||
key_path: &Path,
|
||||
cert_path: &Path,
|
||||
) -> anyhow::Result<Arc<rustls::ServerConfig>> {
|
||||
) -> anyhow::Result<(Arc<rustls::ServerConfig>, TlsServerEndPoint)> {
|
||||
let key = {
|
||||
let key_bytes = std::fs::read(key_path).context("TLS key file")?;
|
||||
|
||||
@@ -189,6 +188,10 @@ pub(super) fn parse_tls(
|
||||
})?
|
||||
};
|
||||
|
||||
// needed for channel bindings
|
||||
let first_cert = cert_chain.first().context("missing certificate")?;
|
||||
let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
|
||||
|
||||
let tls_config =
|
||||
rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))
|
||||
.with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
|
||||
@@ -197,13 +200,14 @@ pub(super) fn parse_tls(
|
||||
.with_single_cert(cert_chain, key)?
|
||||
.into();
|
||||
|
||||
Ok(tls_config)
|
||||
Ok((tls_config, tls_server_end_point))
|
||||
}
|
||||
|
||||
pub(super) async fn task_main(
|
||||
dest_suffix: Arc<String>,
|
||||
tls_config: Arc<rustls::ServerConfig>,
|
||||
compute_tls_config: Option<Arc<rustls::ClientConfig>>,
|
||||
tls_server_end_point: TlsServerEndPoint,
|
||||
listener: tokio::net::TcpListener,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -223,7 +227,8 @@ pub(super) async fn task_main(
|
||||
let dest_suffix = Arc::clone(&dest_suffix);
|
||||
let compute_tls_config = compute_tls_config.clone();
|
||||
|
||||
connections.spawn(
|
||||
let tracker = connections.token();
|
||||
tokio::spawn(
|
||||
async move {
|
||||
socket
|
||||
.set_nodelay(true)
|
||||
@@ -239,7 +244,16 @@ pub(super) async fn task_main(
|
||||
crate::metrics::Protocol::SniRouter,
|
||||
"sni",
|
||||
);
|
||||
handle_client(ctx, dest_suffix, tls_config, compute_tls_config, socket).await
|
||||
handle_client(
|
||||
ctx,
|
||||
dest_suffix,
|
||||
tls_config,
|
||||
compute_tls_config,
|
||||
tls_server_end_point,
|
||||
socket,
|
||||
tracker,
|
||||
)
|
||||
.await
|
||||
}
|
||||
.unwrap_or_else(|e| {
|
||||
// Acknowledge that the task has finished with an error.
|
||||
@@ -258,26 +272,59 @@ pub(super) async fn task_main(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
||||
|
||||
async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
ctx: &RequestContext,
|
||||
raw_stream: S,
|
||||
tracker: TaskTrackerToken,
|
||||
tls_config: Arc<rustls::ServerConfig>,
|
||||
) -> anyhow::Result<TlsStream<S>> {
|
||||
let (mut stream, msg) = PqStream::parse_startup(Stream::from_raw(raw_stream)).await?;
|
||||
match msg {
|
||||
FeStartupPacket::SslRequest { direct: None } => {
|
||||
let raw = stream.accept_tls().await?;
|
||||
tls_server_end_point: TlsServerEndPoint,
|
||||
) -> anyhow::Result<(Stream<S>, TaskTrackerToken)> {
|
||||
let mut stream = PqStream::new(Stream::from_raw(raw_stream), tracker);
|
||||
|
||||
Ok(raw
|
||||
.upgrade(tls_config, !ctx.has_private_peer_addr())
|
||||
.await?)
|
||||
let msg = stream.read_startup_packet().await?;
|
||||
use pq_proto::FeStartupPacket::SslRequest;
|
||||
|
||||
match msg {
|
||||
SslRequest { direct: false } => {
|
||||
stream
|
||||
.write_message(&pq_proto::BeMessage::EncryptionResponse(true))
|
||||
.await?;
|
||||
|
||||
// Upgrade raw stream into a secure TLS-backed stream.
|
||||
// NOTE: We've consumed `tls`; this fact will be used later.
|
||||
|
||||
let (raw, read_buf, tracker) = stream.into_inner();
|
||||
// TODO: Normally, client doesn't send any data before
|
||||
// server says TLS handshake is ok and read_buf is empty.
|
||||
// However, you could imagine pipelining of postgres
|
||||
// SSLRequest + TLS ClientHello in one hunk similar to
|
||||
// pipelining in our node js driver. We should probably
|
||||
// support that by chaining read_buf with the stream.
|
||||
if !read_buf.is_empty() {
|
||||
bail!("data is sent before server replied with EncryptionResponse");
|
||||
}
|
||||
|
||||
Ok((
|
||||
Stream::Tls {
|
||||
tls: Box::new(
|
||||
raw.upgrade(tls_config, !ctx.has_private_peer_addr())
|
||||
.await?,
|
||||
),
|
||||
tls_server_end_point,
|
||||
},
|
||||
tracker,
|
||||
))
|
||||
}
|
||||
unexpected => {
|
||||
info!(
|
||||
?unexpected,
|
||||
"unexpected startup packet, rejecting connection"
|
||||
);
|
||||
Err(stream.throw_error(TlsRequired, None).await)?
|
||||
stream
|
||||
.throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User, None)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -287,18 +334,17 @@ async fn handle_client(
|
||||
dest_suffix: Arc<String>,
|
||||
tls_config: Arc<rustls::ServerConfig>,
|
||||
compute_tls_config: Option<Arc<rustls::ClientConfig>>,
|
||||
tls_server_end_point: TlsServerEndPoint,
|
||||
stream: impl AsyncRead + AsyncWrite + Unpin,
|
||||
tracker: TaskTrackerToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut tls_stream = ssl_handshake(&ctx, stream, tls_config).await?;
|
||||
let (mut tls_stream, _tracker) =
|
||||
ssl_handshake(&ctx, stream, tracker, tls_config, tls_server_end_point).await?;
|
||||
|
||||
// Cut off first part of the SNI domain
|
||||
// We receive required destination details in the format of
|
||||
// `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`
|
||||
let sni = tls_stream
|
||||
.get_ref()
|
||||
.1
|
||||
.server_name()
|
||||
.ok_or(anyhow!("SNI missing"))?;
|
||||
let sni = tls_stream.sni_hostname().ok_or(anyhow!("SNI missing"))?;
|
||||
let dest: Vec<&str> = sni
|
||||
.split_once('.')
|
||||
.context("invalid SNI")?
|
||||
|
||||
@@ -20,7 +20,7 @@ use utils::sentry_init::init_sentry;
|
||||
use utils::{project_build_tag, project_git_version};
|
||||
|
||||
use crate::auth::backend::jwt::JwkCache;
|
||||
use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
|
||||
use crate::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
|
||||
use crate::cancellation::{CancellationHandler, handle_cancel_messages};
|
||||
use crate::config::{
|
||||
self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
|
||||
@@ -29,7 +29,9 @@ use crate::config::{
|
||||
use crate::context::parquet::ParquetUploadArgs;
|
||||
use crate::http::health_server::AppMetrics;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
|
||||
use crate::rate_limiter::{
|
||||
EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo, WakeComputeRateLimiter,
|
||||
};
|
||||
use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
|
||||
use crate::redis::kv_ops::RedisKVClient;
|
||||
use crate::redis::{elasticache, notifications};
|
||||
@@ -152,6 +154,15 @@ struct ProxyCliArgs {
|
||||
/// Wake compute rate limiter max number of requests per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
|
||||
wake_compute_limit: Vec<RateBucketInfo>,
|
||||
/// Whether the auth rate limiter actually takes effect (for testing)
|
||||
#[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
|
||||
auth_rate_limit_enabled: bool,
|
||||
/// Authentication rate limiter max number of hashes per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
|
||||
auth_rate_limit: Vec<RateBucketInfo>,
|
||||
/// The IP subnet to use when considering whether two IP addresses are considered the same.
|
||||
#[clap(long, default_value_t = 64)]
|
||||
auth_rate_limit_ip_subnet: u8,
|
||||
/// Redis rate limiter max number of requests per second.
|
||||
#[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
|
||||
redis_rps_limit: Vec<RateBucketInfo>,
|
||||
@@ -399,9 +410,22 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
Some(tx_cancel),
|
||||
));
|
||||
|
||||
// bit of a hack - find the min rps and max rps supported and turn it into
|
||||
// leaky bucket config instead
|
||||
let max = args
|
||||
.endpoint_rps_limit
|
||||
.iter()
|
||||
.map(|x| x.rps())
|
||||
.max_by(f64::total_cmp)
|
||||
.unwrap_or(EndpointRateLimiter::DEFAULT.max);
|
||||
let rps = args
|
||||
.endpoint_rps_limit
|
||||
.iter()
|
||||
.map(|x| x.rps())
|
||||
.min_by(f64::total_cmp)
|
||||
.unwrap_or(EndpointRateLimiter::DEFAULT.rps);
|
||||
let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
|
||||
RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
|
||||
.unwrap_or(EndpointRateLimiter::DEFAULT),
|
||||
LeakyBucketConfig { rps, max },
|
||||
64,
|
||||
));
|
||||
|
||||
@@ -452,7 +476,8 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
let key_path = args.tls_key.expect("already asserted it is set");
|
||||
let cert_path = args.tls_cert.expect("already asserted it is set");
|
||||
|
||||
let tls_config = super::pg_sni_router::parse_tls(&key_path, &cert_path)?;
|
||||
let (tls_config, tls_server_end_point) =
|
||||
super::pg_sni_router::parse_tls(&key_path, &cert_path)?;
|
||||
|
||||
let dest = Arc::new(dest);
|
||||
|
||||
@@ -460,6 +485,7 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
dest.clone(),
|
||||
tls_config.clone(),
|
||||
None,
|
||||
tls_server_end_point,
|
||||
listen,
|
||||
cancellation_token.clone(),
|
||||
));
|
||||
@@ -468,6 +494,7 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
dest,
|
||||
tls_config,
|
||||
Some(config.connect_to_compute.tls.clone()),
|
||||
tls_server_end_point,
|
||||
listen_tls,
|
||||
cancellation_token.clone(),
|
||||
));
|
||||
@@ -654,6 +681,9 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
||||
jwks_cache: JwkCache::default(),
|
||||
thread_pool,
|
||||
scram_protocol_timeout: args.scram_protocol_timeout,
|
||||
rate_limiter_enabled: args.auth_rate_limit_enabled,
|
||||
rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
|
||||
rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
|
||||
ip_allowlist_check_enabled: !args.is_private_access_proxy,
|
||||
is_vpc_acccess_proxy: args.is_private_access_proxy,
|
||||
is_auth_broker: args.is_auth_broker,
|
||||
|
||||
682
proxy/src/cache/project_info.rs
vendored
682
proxy/src/cache/project_info.rs
vendored
@@ -1,25 +1,30 @@
|
||||
use std::collections::{HashMap, HashSet, hash_map};
|
||||
use std::collections::HashSet;
|
||||
use std::convert::Infallible;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use clashmap::ClashMap;
|
||||
use clashmap::mapref::one::Ref;
|
||||
use rand::{Rng, thread_rng};
|
||||
use smol_str::SmolStr;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::Instant;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::{Cache, Cached};
|
||||
use crate::auth::IpPattern;
|
||||
use crate::config::ProjectInfoCacheOptions;
|
||||
use crate::control_plane::{EndpointAccessControl, RoleAccessControl};
|
||||
use crate::control_plane::{AccessBlockerFlags, AuthSecret};
|
||||
use crate::intern::{AccountIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||
use crate::types::{EndpointId, RoleName};
|
||||
|
||||
#[async_trait]
|
||||
pub(crate) trait ProjectInfoCache {
|
||||
fn invalidate_endpoint_access_for_project(&self, project_id: ProjectIdInt);
|
||||
fn invalidate_endpoint_access_for_org(&self, account_id: AccountIdInt);
|
||||
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>);
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt);
|
||||
fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt);
|
||||
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
|
||||
async fn decrement_active_listeners(&self);
|
||||
async fn increment_active_listeners(&self);
|
||||
@@ -37,10 +42,6 @@ impl<T> Entry<T> {
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get(&self, valid_since: Instant) -> Option<&T> {
|
||||
(valid_since < self.created_at).then_some(&self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<T> for Entry<T> {
|
||||
@@ -49,32 +50,101 @@ impl<T> From<T> for Entry<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct EndpointInfo {
|
||||
role_controls: HashMap<RoleNameInt, Entry<RoleAccessControl>>,
|
||||
controls: Option<Entry<EndpointAccessControl>>,
|
||||
secret: std::collections::HashMap<RoleNameInt, Entry<Option<AuthSecret>>>,
|
||||
allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
|
||||
block_public_or_vpc_access: Option<Entry<AccessBlockerFlags>>,
|
||||
allowed_vpc_endpoint_ids: Option<Entry<Arc<Vec<String>>>>,
|
||||
}
|
||||
|
||||
impl EndpointInfo {
|
||||
fn check_ignore_cache(ignore_cache_since: Option<Instant>, created_at: Instant) -> bool {
|
||||
match ignore_cache_since {
|
||||
None => false,
|
||||
Some(t) => t < created_at,
|
||||
}
|
||||
}
|
||||
pub(crate) fn get_role_secret(
|
||||
&self,
|
||||
role_name: RoleNameInt,
|
||||
valid_since: Instant,
|
||||
) -> Option<RoleAccessControl> {
|
||||
let controls = self.role_controls.get(&role_name)?;
|
||||
controls.get(valid_since).cloned()
|
||||
ignore_cache_since: Option<Instant>,
|
||||
) -> Option<(Option<AuthSecret>, bool)> {
|
||||
if let Some(secret) = self.secret.get(&role_name) {
|
||||
if valid_since < secret.created_at {
|
||||
return Some((
|
||||
secret.value.clone(),
|
||||
Self::check_ignore_cache(ignore_cache_since, secret.created_at),
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn get_controls(&self, valid_since: Instant) -> Option<EndpointAccessControl> {
|
||||
let controls = self.controls.as_ref()?;
|
||||
controls.get(valid_since).cloned()
|
||||
pub(crate) fn get_allowed_ips(
|
||||
&self,
|
||||
valid_since: Instant,
|
||||
ignore_cache_since: Option<Instant>,
|
||||
) -> Option<(Arc<Vec<IpPattern>>, bool)> {
|
||||
if let Some(allowed_ips) = &self.allowed_ips {
|
||||
if valid_since < allowed_ips.created_at {
|
||||
return Some((
|
||||
allowed_ips.value.clone(),
|
||||
Self::check_ignore_cache(ignore_cache_since, allowed_ips.created_at),
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
pub(crate) fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
valid_since: Instant,
|
||||
ignore_cache_since: Option<Instant>,
|
||||
) -> Option<(Arc<Vec<String>>, bool)> {
|
||||
if let Some(allowed_vpc_endpoint_ids) = &self.allowed_vpc_endpoint_ids {
|
||||
if valid_since < allowed_vpc_endpoint_ids.created_at {
|
||||
return Some((
|
||||
allowed_vpc_endpoint_ids.value.clone(),
|
||||
Self::check_ignore_cache(
|
||||
ignore_cache_since,
|
||||
allowed_vpc_endpoint_ids.created_at,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
pub(crate) fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
valid_since: Instant,
|
||||
ignore_cache_since: Option<Instant>,
|
||||
) -> Option<(AccessBlockerFlags, bool)> {
|
||||
if let Some(block_public_or_vpc_access) = &self.block_public_or_vpc_access {
|
||||
if valid_since < block_public_or_vpc_access.created_at {
|
||||
return Some((
|
||||
block_public_or_vpc_access.value.clone(),
|
||||
Self::check_ignore_cache(
|
||||
ignore_cache_since,
|
||||
block_public_or_vpc_access.created_at,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) fn invalidate_endpoint(&mut self) {
|
||||
self.controls = None;
|
||||
pub(crate) fn invalidate_allowed_ips(&mut self) {
|
||||
self.allowed_ips = None;
|
||||
}
|
||||
pub(crate) fn invalidate_allowed_vpc_endpoint_ids(&mut self) {
|
||||
self.allowed_vpc_endpoint_ids = None;
|
||||
}
|
||||
pub(crate) fn invalidate_block_public_or_vpc_access(&mut self) {
|
||||
self.block_public_or_vpc_access = None;
|
||||
}
|
||||
|
||||
pub(crate) fn invalidate_role_secret(&mut self, role_name: RoleNameInt) {
|
||||
self.role_controls.remove(&role_name);
|
||||
self.secret.remove(&role_name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,22 +170,34 @@ pub struct ProjectInfoCacheImpl {
|
||||
|
||||
#[async_trait]
|
||||
impl ProjectInfoCache for ProjectInfoCacheImpl {
|
||||
fn invalidate_endpoint_access_for_project(&self, project_id: ProjectIdInt) {
|
||||
info!("invalidating endpoint access for project `{project_id}`");
|
||||
let endpoints = self
|
||||
.project2ep
|
||||
.get(&project_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_endpoint();
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_projects(&self, project_ids: Vec<ProjectIdInt>) {
|
||||
info!(
|
||||
"invalidating allowed vpc endpoint ids for projects `{}`",
|
||||
project_ids
|
||||
.iter()
|
||||
.map(|id| id.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
);
|
||||
for project_id in project_ids {
|
||||
let endpoints = self
|
||||
.project2ep
|
||||
.get(&project_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_endpoint_access_for_org(&self, account_id: AccountIdInt) {
|
||||
info!("invalidating endpoint access for org `{account_id}`");
|
||||
fn invalidate_allowed_vpc_endpoint_ids_for_org(&self, account_id: AccountIdInt) {
|
||||
info!(
|
||||
"invalidating allowed vpc endpoint ids for org `{}`",
|
||||
account_id
|
||||
);
|
||||
let endpoints = self
|
||||
.account2ep
|
||||
.get(&account_id)
|
||||
@@ -123,11 +205,41 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_endpoint();
|
||||
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_block_public_or_vpc_access_for_project(&self, project_id: ProjectIdInt) {
|
||||
info!(
|
||||
"invalidating block public or vpc access for project `{}`",
|
||||
project_id
|
||||
);
|
||||
let endpoints = self
|
||||
.project2ep
|
||||
.get(&project_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_block_public_or_vpc_access();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
|
||||
info!("invalidating allowed ips for project `{}`", project_id);
|
||||
let endpoints = self
|
||||
.project2ep
|
||||
.get(&project_id)
|
||||
.map(|kv| kv.value().clone())
|
||||
.unwrap_or_default();
|
||||
for endpoint_id in endpoints {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_ips();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt) {
|
||||
info!(
|
||||
"invalidating role secret for project_id `{}` and role_name `{}`",
|
||||
@@ -144,7 +256,6 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn decrement_active_listeners(&self) {
|
||||
let mut listeners_guard = self.active_listeners_lock.lock().await;
|
||||
if *listeners_guard == 0 {
|
||||
@@ -182,72 +293,156 @@ impl ProjectInfoCacheImpl {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_endpoint_cache(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
) -> Option<Ref<'_, EndpointIdInt, EndpointInfo>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
self.cache.get(&endpoint_id)
|
||||
}
|
||||
|
||||
pub(crate) fn get_role_secret(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
role_name: &RoleName,
|
||||
) -> Option<RoleAccessControl> {
|
||||
let valid_since = self.get_cache_times();
|
||||
) -> Option<Cached<&Self, Option<AuthSecret>>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
let role_name = RoleNameInt::get(role_name)?;
|
||||
let endpoint_info = self.get_endpoint_cache(endpoint_id)?;
|
||||
endpoint_info.get_role_secret(role_name, valid_since)
|
||||
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
||||
let endpoint_info = self.cache.get(&endpoint_id)?;
|
||||
let (value, ignore_cache) =
|
||||
endpoint_info.get_role_secret(role_name, valid_since, ignore_cache_since)?;
|
||||
if !ignore_cache {
|
||||
let cached = Cached {
|
||||
token: Some((
|
||||
self,
|
||||
CachedLookupInfo::new_role_secret(endpoint_id, role_name),
|
||||
)),
|
||||
value,
|
||||
};
|
||||
return Some(cached);
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
|
||||
pub(crate) fn get_endpoint_access(
|
||||
pub(crate) fn get_allowed_ips(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
) -> Option<EndpointAccessControl> {
|
||||
let valid_since = self.get_cache_times();
|
||||
let endpoint_info = self.get_endpoint_cache(endpoint_id)?;
|
||||
endpoint_info.get_controls(valid_since)
|
||||
) -> Option<Cached<&Self, Arc<Vec<IpPattern>>>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
||||
let endpoint_info = self.cache.get(&endpoint_id)?;
|
||||
let value = endpoint_info.get_allowed_ips(valid_since, ignore_cache_since);
|
||||
let (value, ignore_cache) = value?;
|
||||
if !ignore_cache {
|
||||
let cached = Cached {
|
||||
token: Some((self, CachedLookupInfo::new_allowed_ips(endpoint_id))),
|
||||
value,
|
||||
};
|
||||
return Some(cached);
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
pub(crate) fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
) -> Option<Cached<&Self, Arc<Vec<String>>>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
||||
let endpoint_info = self.cache.get(&endpoint_id)?;
|
||||
let value = endpoint_info.get_allowed_vpc_endpoint_ids(valid_since, ignore_cache_since);
|
||||
let (value, ignore_cache) = value?;
|
||||
if !ignore_cache {
|
||||
let cached = Cached {
|
||||
token: Some((
|
||||
self,
|
||||
CachedLookupInfo::new_allowed_vpc_endpoint_ids(endpoint_id),
|
||||
)),
|
||||
value,
|
||||
};
|
||||
return Some(cached);
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
pub(crate) fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
endpoint_id: &EndpointId,
|
||||
) -> Option<Cached<&Self, AccessBlockerFlags>> {
|
||||
let endpoint_id = EndpointIdInt::get(endpoint_id)?;
|
||||
let (valid_since, ignore_cache_since) = self.get_cache_times();
|
||||
let endpoint_info = self.cache.get(&endpoint_id)?;
|
||||
let value = endpoint_info.get_block_public_or_vpc_access(valid_since, ignore_cache_since);
|
||||
let (value, ignore_cache) = value?;
|
||||
if !ignore_cache {
|
||||
let cached = Cached {
|
||||
token: Some((
|
||||
self,
|
||||
CachedLookupInfo::new_block_public_or_vpc_access(endpoint_id),
|
||||
)),
|
||||
value,
|
||||
};
|
||||
return Some(cached);
|
||||
}
|
||||
Some(Cached::new_uncached(value))
|
||||
}
|
||||
|
||||
pub(crate) fn insert_endpoint_access(
|
||||
pub(crate) fn insert_role_secret(
|
||||
&self,
|
||||
account_id: Option<AccountIdInt>,
|
||||
project_id: ProjectIdInt,
|
||||
endpoint_id: EndpointIdInt,
|
||||
role_name: RoleNameInt,
|
||||
controls: EndpointAccessControl,
|
||||
role_controls: RoleAccessControl,
|
||||
secret: Option<AuthSecret>,
|
||||
) {
|
||||
if let Some(account_id) = account_id {
|
||||
self.insert_account2endpoint(account_id, endpoint_id);
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
|
||||
if self.cache.len() >= self.config.size {
|
||||
// If there are too many entries, wait until the next gc cycle.
|
||||
return;
|
||||
}
|
||||
|
||||
let controls = Entry::from(controls);
|
||||
let role_controls = Entry::from(role_controls);
|
||||
|
||||
match self.cache.entry(endpoint_id) {
|
||||
clashmap::Entry::Vacant(e) => {
|
||||
e.insert(EndpointInfo {
|
||||
role_controls: HashMap::from_iter([(role_name, role_controls)]),
|
||||
controls: Some(controls),
|
||||
});
|
||||
}
|
||||
clashmap::Entry::Occupied(mut e) => {
|
||||
let ep = e.get_mut();
|
||||
ep.controls = Some(controls);
|
||||
if ep.role_controls.len() < self.config.max_roles {
|
||||
ep.role_controls.insert(role_name, role_controls);
|
||||
}
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
let mut entry = self.cache.entry(endpoint_id).or_default();
|
||||
if entry.secret.len() < self.config.max_roles {
|
||||
entry.secret.insert(role_name, secret.into());
|
||||
}
|
||||
}
|
||||
pub(crate) fn insert_allowed_ips(
|
||||
&self,
|
||||
project_id: ProjectIdInt,
|
||||
endpoint_id: EndpointIdInt,
|
||||
allowed_ips: Arc<Vec<IpPattern>>,
|
||||
) {
|
||||
if self.cache.len() >= self.config.size {
|
||||
// If there are too many entries, wait until the next gc cycle.
|
||||
return;
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
self.cache.entry(endpoint_id).or_default().allowed_ips = Some(allowed_ips.into());
|
||||
}
|
||||
pub(crate) fn insert_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
account_id: Option<AccountIdInt>,
|
||||
project_id: ProjectIdInt,
|
||||
endpoint_id: EndpointIdInt,
|
||||
allowed_vpc_endpoint_ids: Arc<Vec<String>>,
|
||||
) {
|
||||
if self.cache.len() >= self.config.size {
|
||||
// If there are too many entries, wait until the next gc cycle.
|
||||
return;
|
||||
}
|
||||
if let Some(account_id) = account_id {
|
||||
self.insert_account2endpoint(account_id, endpoint_id);
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
self.cache
|
||||
.entry(endpoint_id)
|
||||
.or_default()
|
||||
.allowed_vpc_endpoint_ids = Some(allowed_vpc_endpoint_ids.into());
|
||||
}
|
||||
pub(crate) fn insert_block_public_or_vpc_access(
|
||||
&self,
|
||||
project_id: ProjectIdInt,
|
||||
endpoint_id: EndpointIdInt,
|
||||
access_blockers: AccessBlockerFlags,
|
||||
) {
|
||||
if self.cache.len() >= self.config.size {
|
||||
// If there are too many entries, wait until the next gc cycle.
|
||||
return;
|
||||
}
|
||||
self.insert_project2endpoint(project_id, endpoint_id);
|
||||
self.cache
|
||||
.entry(endpoint_id)
|
||||
.or_default()
|
||||
.block_public_or_vpc_access = Some(access_blockers.into());
|
||||
}
|
||||
|
||||
fn insert_project2endpoint(&self, project_id: ProjectIdInt, endpoint_id: EndpointIdInt) {
|
||||
if let Some(mut endpoints) = self.project2ep.get_mut(&project_id) {
|
||||
@@ -257,7 +452,6 @@ impl ProjectInfoCacheImpl {
|
||||
.insert(project_id, HashSet::from([endpoint_id]));
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_account2endpoint(&self, account_id: AccountIdInt, endpoint_id: EndpointIdInt) {
|
||||
if let Some(mut endpoints) = self.account2ep.get_mut(&account_id) {
|
||||
endpoints.insert(endpoint_id);
|
||||
@@ -266,57 +460,21 @@ impl ProjectInfoCacheImpl {
|
||||
.insert(account_id, HashSet::from([endpoint_id]));
|
||||
}
|
||||
}
|
||||
|
||||
fn ignore_ttl_since(&self) -> Option<Instant> {
|
||||
fn get_cache_times(&self) -> (Instant, Option<Instant>) {
|
||||
let mut valid_since = Instant::now() - self.config.ttl;
|
||||
// Only ignore cache if ttl is disabled.
|
||||
let ttl_disabled_since_us = self
|
||||
.ttl_disabled_since_us
|
||||
.load(std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
if ttl_disabled_since_us == u64::MAX {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(self.start_time + Duration::from_micros(ttl_disabled_since_us))
|
||||
}
|
||||
|
||||
fn get_cache_times(&self) -> Instant {
|
||||
let mut valid_since = Instant::now() - self.config.ttl;
|
||||
if let Some(ignore_ttl_since) = self.ignore_ttl_since() {
|
||||
let ignore_cache_since = if ttl_disabled_since_us == u64::MAX {
|
||||
None
|
||||
} else {
|
||||
let ignore_cache_since = self.start_time + Duration::from_micros(ttl_disabled_since_us);
|
||||
// We are fine if entry is not older than ttl or was added before we are getting notifications.
|
||||
valid_since = valid_since.min(ignore_ttl_since);
|
||||
}
|
||||
valid_since
|
||||
}
|
||||
|
||||
pub fn maybe_invalidate_role_secret(&self, endpoint_id: &EndpointId, role_name: &RoleName) {
|
||||
let Some(endpoint_id) = EndpointIdInt::get(endpoint_id) else {
|
||||
return;
|
||||
valid_since = valid_since.min(ignore_cache_since);
|
||||
Some(ignore_cache_since)
|
||||
};
|
||||
let Some(role_name) = RoleNameInt::get(role_name) else {
|
||||
return;
|
||||
};
|
||||
|
||||
let Some(mut endpoint_info) = self.cache.get_mut(&endpoint_id) else {
|
||||
return;
|
||||
};
|
||||
|
||||
let entry = endpoint_info.role_controls.entry(role_name);
|
||||
let hash_map::Entry::Occupied(role_controls) = entry else {
|
||||
return;
|
||||
};
|
||||
|
||||
let created_at = role_controls.get().created_at;
|
||||
let expire = match self.ignore_ttl_since() {
|
||||
// if ignoring TTL, we should still try and roll the password if it's old
|
||||
// and we the client gave an incorrect password. There could be some lag on the redis channel.
|
||||
Some(_) => created_at + self.config.ttl < Instant::now(),
|
||||
// edge case: redis is down, let's be generous and invalidate the cache immediately.
|
||||
None => true,
|
||||
};
|
||||
|
||||
if expire {
|
||||
role_controls.remove();
|
||||
}
|
||||
(valid_since, ignore_cache_since)
|
||||
}
|
||||
|
||||
pub async fn gc_worker(&self) -> anyhow::Result<Infallible> {
|
||||
@@ -351,12 +509,84 @@ impl ProjectInfoCacheImpl {
|
||||
}
|
||||
}
|
||||
|
||||
/// Lookup info for project info cache.
|
||||
/// This is used to invalidate cache entries.
|
||||
pub(crate) struct CachedLookupInfo {
|
||||
/// Search by this key.
|
||||
endpoint_id: EndpointIdInt,
|
||||
lookup_type: LookupType,
|
||||
}
|
||||
|
||||
impl CachedLookupInfo {
|
||||
pub(self) fn new_role_secret(endpoint_id: EndpointIdInt, role_name: RoleNameInt) -> Self {
|
||||
Self {
|
||||
endpoint_id,
|
||||
lookup_type: LookupType::RoleSecret(role_name),
|
||||
}
|
||||
}
|
||||
pub(self) fn new_allowed_ips(endpoint_id: EndpointIdInt) -> Self {
|
||||
Self {
|
||||
endpoint_id,
|
||||
lookup_type: LookupType::AllowedIps,
|
||||
}
|
||||
}
|
||||
pub(self) fn new_allowed_vpc_endpoint_ids(endpoint_id: EndpointIdInt) -> Self {
|
||||
Self {
|
||||
endpoint_id,
|
||||
lookup_type: LookupType::AllowedVpcEndpointIds,
|
||||
}
|
||||
}
|
||||
pub(self) fn new_block_public_or_vpc_access(endpoint_id: EndpointIdInt) -> Self {
|
||||
Self {
|
||||
endpoint_id,
|
||||
lookup_type: LookupType::BlockPublicOrVpcAccess,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum LookupType {
|
||||
RoleSecret(RoleNameInt),
|
||||
AllowedIps,
|
||||
AllowedVpcEndpointIds,
|
||||
BlockPublicOrVpcAccess,
|
||||
}
|
||||
|
||||
impl Cache for ProjectInfoCacheImpl {
|
||||
type Key = SmolStr;
|
||||
// Value is not really used here, but we need to specify it.
|
||||
type Value = SmolStr;
|
||||
|
||||
type LookupInfo<Key> = CachedLookupInfo;
|
||||
|
||||
fn invalidate(&self, key: &Self::LookupInfo<SmolStr>) {
|
||||
match &key.lookup_type {
|
||||
LookupType::RoleSecret(role_name) => {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
||||
endpoint_info.invalidate_role_secret(*role_name);
|
||||
}
|
||||
}
|
||||
LookupType::AllowedIps => {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_ips();
|
||||
}
|
||||
}
|
||||
LookupType::AllowedVpcEndpointIds => {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
||||
endpoint_info.invalidate_allowed_vpc_endpoint_ids();
|
||||
}
|
||||
}
|
||||
LookupType::BlockPublicOrVpcAccess => {
|
||||
if let Some(mut endpoint_info) = self.cache.get_mut(&key.endpoint_id) {
|
||||
endpoint_info.invalidate_block_public_or_vpc_access();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::control_plane::{AccessBlockerFlags, AuthSecret};
|
||||
use crate::scram::ServerSecret;
|
||||
use crate::types::ProjectId;
|
||||
|
||||
@@ -371,8 +601,6 @@ mod tests {
|
||||
});
|
||||
let project_id: ProjectId = "project".into();
|
||||
let endpoint_id: EndpointId = "endpoint".into();
|
||||
let account_id: Option<AccountIdInt> = None;
|
||||
|
||||
let user1: RoleName = "user1".into();
|
||||
let user2: RoleName = "user2".into();
|
||||
let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
|
||||
@@ -381,73 +609,183 @@ mod tests {
|
||||
"127.0.0.1".parse().unwrap(),
|
||||
"127.0.0.2".parse().unwrap(),
|
||||
]);
|
||||
|
||||
cache.insert_endpoint_access(
|
||||
account_id,
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user1).into(),
|
||||
EndpointAccessControl {
|
||||
allowed_ips: allowed_ips.clone(),
|
||||
allowed_vpce: Arc::new(vec![]),
|
||||
flags: AccessBlockerFlags::default(),
|
||||
},
|
||||
RoleAccessControl {
|
||||
secret: secret1.clone(),
|
||||
},
|
||||
secret1.clone(),
|
||||
);
|
||||
|
||||
cache.insert_endpoint_access(
|
||||
account_id,
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user2).into(),
|
||||
EndpointAccessControl {
|
||||
allowed_ips: allowed_ips.clone(),
|
||||
allowed_vpce: Arc::new(vec![]),
|
||||
flags: AccessBlockerFlags::default(),
|
||||
},
|
||||
RoleAccessControl {
|
||||
secret: secret2.clone(),
|
||||
},
|
||||
secret2.clone(),
|
||||
);
|
||||
cache.insert_allowed_ips(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
||||
assert_eq!(cached.secret, secret1);
|
||||
|
||||
assert!(cached.cached());
|
||||
assert_eq!(cached.value, secret1);
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
|
||||
assert_eq!(cached.secret, secret2);
|
||||
assert!(cached.cached());
|
||||
assert_eq!(cached.value, secret2);
|
||||
|
||||
// Shouldn't add more than 2 roles.
|
||||
let user3: RoleName = "user3".into();
|
||||
let secret3 = Some(AuthSecret::Scram(ServerSecret::mock([3; 32])));
|
||||
|
||||
cache.insert_endpoint_access(
|
||||
account_id,
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user3).into(),
|
||||
EndpointAccessControl {
|
||||
allowed_ips: allowed_ips.clone(),
|
||||
allowed_vpce: Arc::new(vec![]),
|
||||
flags: AccessBlockerFlags::default(),
|
||||
},
|
||||
RoleAccessControl {
|
||||
secret: secret3.clone(),
|
||||
},
|
||||
secret3.clone(),
|
||||
);
|
||||
|
||||
assert!(cache.get_role_secret(&endpoint_id, &user3).is_none());
|
||||
|
||||
let cached = cache.get_endpoint_access(&endpoint_id).unwrap();
|
||||
assert_eq!(cached.allowed_ips, allowed_ips);
|
||||
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
||||
assert!(cached.cached());
|
||||
assert_eq!(cached.value, allowed_ips);
|
||||
|
||||
tokio::time::advance(Duration::from_secs(2)).await;
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user1);
|
||||
assert!(cached.is_none());
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user2);
|
||||
assert!(cached.is_none());
|
||||
let cached = cache.get_endpoint_access(&endpoint_id);
|
||||
let cached = cache.get_allowed_ips(&endpoint_id);
|
||||
assert!(cached.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_project_info_cache_invalidations() {
|
||||
tokio::time::pause();
|
||||
let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
|
||||
size: 2,
|
||||
max_roles: 2,
|
||||
ttl: Duration::from_secs(1),
|
||||
gc_interval: Duration::from_secs(600),
|
||||
}));
|
||||
cache.clone().increment_active_listeners().await;
|
||||
tokio::time::advance(Duration::from_secs(2)).await;
|
||||
|
||||
let project_id: ProjectId = "project".into();
|
||||
let endpoint_id: EndpointId = "endpoint".into();
|
||||
let user1: RoleName = "user1".into();
|
||||
let user2: RoleName = "user2".into();
|
||||
let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
|
||||
let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
|
||||
let allowed_ips = Arc::new(vec![
|
||||
"127.0.0.1".parse().unwrap(),
|
||||
"127.0.0.2".parse().unwrap(),
|
||||
]);
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user1).into(),
|
||||
secret1.clone(),
|
||||
);
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user2).into(),
|
||||
secret2.clone(),
|
||||
);
|
||||
cache.insert_allowed_ips(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
|
||||
tokio::time::advance(Duration::from_secs(2)).await;
|
||||
// Nothing should be invalidated.
|
||||
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
||||
// TTL is disabled, so it should be impossible to invalidate this value.
|
||||
assert!(!cached.cached());
|
||||
assert_eq!(cached.value, secret1);
|
||||
|
||||
cached.invalidate(); // Shouldn't do anything.
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
||||
assert_eq!(cached.value, secret1);
|
||||
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
|
||||
assert!(!cached.cached());
|
||||
assert_eq!(cached.value, secret2);
|
||||
|
||||
// The only way to invalidate this value is to invalidate via the api.
|
||||
cache.invalidate_role_secret_for_project((&project_id).into(), (&user2).into());
|
||||
assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
|
||||
|
||||
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
||||
assert!(!cached.cached());
|
||||
assert_eq!(cached.value, allowed_ips);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_increment_active_listeners_invalidate_added_before() {
|
||||
tokio::time::pause();
|
||||
let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
|
||||
size: 2,
|
||||
max_roles: 2,
|
||||
ttl: Duration::from_secs(1),
|
||||
gc_interval: Duration::from_secs(600),
|
||||
}));
|
||||
|
||||
let project_id: ProjectId = "project".into();
|
||||
let endpoint_id: EndpointId = "endpoint".into();
|
||||
let user1: RoleName = "user1".into();
|
||||
let user2: RoleName = "user2".into();
|
||||
let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
|
||||
let secret2 = Some(AuthSecret::Scram(ServerSecret::mock([2; 32])));
|
||||
let allowed_ips = Arc::new(vec![
|
||||
"127.0.0.1".parse().unwrap(),
|
||||
"127.0.0.2".parse().unwrap(),
|
||||
]);
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user1).into(),
|
||||
secret1.clone(),
|
||||
);
|
||||
cache.clone().increment_active_listeners().await;
|
||||
tokio::time::advance(Duration::from_millis(100)).await;
|
||||
cache.insert_role_secret(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
(&user2).into(),
|
||||
secret2.clone(),
|
||||
);
|
||||
|
||||
// Added before ttl was disabled + ttl should be still cached.
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
|
||||
assert!(cached.cached());
|
||||
let cached = cache.get_role_secret(&endpoint_id, &user2).unwrap();
|
||||
assert!(cached.cached());
|
||||
|
||||
tokio::time::advance(Duration::from_secs(1)).await;
|
||||
// Added before ttl was disabled + ttl should expire.
|
||||
assert!(cache.get_role_secret(&endpoint_id, &user1).is_none());
|
||||
assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
|
||||
|
||||
// Added after ttl was disabled + ttl should not be cached.
|
||||
cache.insert_allowed_ips(
|
||||
(&project_id).into(),
|
||||
(&endpoint_id).into(),
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
||||
assert!(!cached.cached());
|
||||
|
||||
tokio::time::advance(Duration::from_secs(1)).await;
|
||||
// Added before ttl was disabled + ttl still should expire.
|
||||
assert!(cache.get_role_secret(&endpoint_id, &user1).is_none());
|
||||
assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
|
||||
// Shouldn't be invalidated.
|
||||
|
||||
let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
|
||||
assert!(!cached.cached());
|
||||
assert_eq!(cached.value, allowed_ips);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use anyhow::{Context, anyhow};
|
||||
use ipnet::{IpNet, Ipv4Net, Ipv6Net};
|
||||
use postgres_client::CancelToken;
|
||||
use postgres_client::tls::MakeTlsConnect;
|
||||
use pq_proto::CancelKeyData;
|
||||
use redis::{Cmd, FromRedisValue, Value};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
@@ -12,15 +13,15 @@ use tokio::net::TcpStream;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::auth::AuthError;
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::{AuthError, check_peer_addr_is_in_list};
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::ControlPlaneApi;
|
||||
use crate::error::ReportableError;
|
||||
use crate::ext::LockExt;
|
||||
use crate::metrics::{CancelChannelSizeGuard, CancellationRequest, Metrics, RedisMsgKind};
|
||||
use crate::pqproto::CancelKeyData;
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::rate_limiter::LeakyBucketRateLimiter;
|
||||
use crate::redis::keys::KeyPrefix;
|
||||
use crate::redis::kv_ops::RedisKVClient;
|
||||
@@ -271,7 +272,13 @@ pub(crate) enum CancelError {
|
||||
#[error("rate limit exceeded")]
|
||||
RateLimit,
|
||||
|
||||
#[error("Authentication error")]
|
||||
#[error("IP is not allowed")]
|
||||
IpNotAllowed,
|
||||
|
||||
#[error("VPC endpoint id is not allowed to connect")]
|
||||
VpcEndpointIdNotAllowed,
|
||||
|
||||
#[error("Authentication backend error")]
|
||||
AuthError(#[from] AuthError),
|
||||
|
||||
#[error("key not found")]
|
||||
@@ -290,7 +297,10 @@ impl ReportableError for CancelError {
|
||||
}
|
||||
CancelError::Postgres(_) => crate::error::ErrorKind::Compute,
|
||||
CancelError::RateLimit => crate::error::ErrorKind::RateLimit,
|
||||
CancelError::NotFound | CancelError::AuthError(_) => crate::error::ErrorKind::User,
|
||||
CancelError::IpNotAllowed
|
||||
| CancelError::VpcEndpointIdNotAllowed
|
||||
| CancelError::NotFound => crate::error::ErrorKind::User,
|
||||
CancelError::AuthError(_) => crate::error::ErrorKind::ControlPlane,
|
||||
CancelError::InternalError => crate::error::ErrorKind::Service,
|
||||
}
|
||||
}
|
||||
@@ -313,7 +323,7 @@ impl CancellationHandler {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_key(self: &Arc<Self>) -> Session {
|
||||
pub(crate) fn get_key(self: Arc<Self>) -> Session {
|
||||
// we intentionally generate a random "backend pid" and "secret key" here.
|
||||
// we use the corresponding u64 as an identifier for the
|
||||
// actual endpoint+pid+secret for postgres/pgbouncer.
|
||||
@@ -330,7 +340,7 @@ impl CancellationHandler {
|
||||
Session {
|
||||
key,
|
||||
redis_key,
|
||||
cancellation_handler: Arc::clone(self),
|
||||
cancellation_handler: self,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -412,13 +422,7 @@ impl CancellationHandler {
|
||||
IpAddr::V4(ip) => IpNet::V4(Ipv4Net::new_assert(ip, 24).trunc()), // use defaut mask here
|
||||
IpAddr::V6(ip) => IpNet::V6(Ipv6Net::new_assert(ip, 64).trunc()),
|
||||
};
|
||||
|
||||
let allowed = {
|
||||
let rate_limit_config = None;
|
||||
let limiter = self.limiter.lock_propagate_poison();
|
||||
limiter.check(subnet_key, rate_limit_config, 1)
|
||||
};
|
||||
if !allowed {
|
||||
if !self.limiter.lock_propagate_poison().check(subnet_key, 1) {
|
||||
// log only the subnet part of the IP address to know which subnet is rate limited
|
||||
tracing::warn!("Rate limit exceeded. Skipping cancellation message, {subnet_key}");
|
||||
Metrics::get()
|
||||
@@ -446,13 +450,52 @@ impl CancellationHandler {
|
||||
return Err(CancelError::NotFound);
|
||||
};
|
||||
|
||||
let info = &cancel_closure.user_info;
|
||||
let access_controls = auth_backend
|
||||
.get_endpoint_access_control(&ctx, &info.endpoint, &info.user)
|
||||
if check_ip_allowed {
|
||||
let ip_allowlist = auth_backend
|
||||
.get_allowed_ips(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(|e| CancelError::AuthError(e.into()))?;
|
||||
|
||||
if !check_peer_addr_is_in_list(&ctx.peer_addr(), &ip_allowlist) {
|
||||
// log it here since cancel_session could be spawned in a task
|
||||
tracing::warn!(
|
||||
"IP is not allowed to cancel the query: {key}, address: {}",
|
||||
ctx.peer_addr()
|
||||
);
|
||||
return Err(CancelError::IpNotAllowed);
|
||||
}
|
||||
}
|
||||
|
||||
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
|
||||
let access_blocks = auth_backend
|
||||
.get_block_public_or_vpc_access(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(|e| CancelError::AuthError(e.into()))?;
|
||||
|
||||
access_controls.check(&ctx, check_ip_allowed, check_vpc_allowed)?;
|
||||
if check_vpc_allowed {
|
||||
if access_blocks.vpc_access_blocked {
|
||||
return Err(CancelError::AuthError(AuthError::NetworkNotAllowed));
|
||||
}
|
||||
|
||||
let incoming_vpc_endpoint_id = match ctx.extra() {
|
||||
None => return Err(CancelError::AuthError(AuthError::MissingVPCEndpointId)),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
|
||||
let allowed_vpc_endpoint_ids = auth_backend
|
||||
.get_allowed_vpc_endpoint_ids(&ctx, &cancel_closure.user_info)
|
||||
.await
|
||||
.map_err(|e| CancelError::AuthError(e.into()))?;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !allowed_vpc_endpoint_ids.is_empty()
|
||||
&& !allowed_vpc_endpoint_ids.contains(&incoming_vpc_endpoint_id)
|
||||
{
|
||||
return Err(CancelError::VpcEndpointIdNotAllowed);
|
||||
}
|
||||
} else if access_blocks.public_access_blocked {
|
||||
return Err(CancelError::VpcEndpointIdNotAllowed);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
|
||||
@@ -8,6 +8,7 @@ use itertools::Itertools;
|
||||
use postgres_client::tls::MakeTlsConnect;
|
||||
use postgres_client::{CancelToken, RawConnection};
|
||||
use postgres_protocol::message::backend::NoticeResponseBody;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use rustls::pki_types::InvalidDnsNameError;
|
||||
use thiserror::Error;
|
||||
use tokio::net::{TcpStream, lookup_host};
|
||||
@@ -23,7 +24,6 @@ use crate::control_plane::errors::WakeComputeError;
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
use crate::metrics::{Metrics, NumDbConnectionsGuard};
|
||||
use crate::pqproto::StartupMessageParams;
|
||||
use crate::proxy::neon_option;
|
||||
use crate::tls::postgres_rustls::MakeRustlsConnect;
|
||||
use crate::types::Host;
|
||||
|
||||
@@ -7,6 +7,7 @@ use arc_swap::ArcSwapOption;
|
||||
use clap::ValueEnum;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
|
||||
use crate::auth::backend::AuthRateLimiter;
|
||||
use crate::auth::backend::jwt::JwkCache;
|
||||
use crate::control_plane::locks::ApiLocks;
|
||||
use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig};
|
||||
@@ -64,6 +65,9 @@ pub struct HttpConfig {
|
||||
pub struct AuthenticationConfig {
|
||||
pub thread_pool: Arc<ThreadPool>,
|
||||
pub scram_protocol_timeout: tokio::time::Duration,
|
||||
pub rate_limiter_enabled: bool,
|
||||
pub rate_limiter: AuthRateLimiter,
|
||||
pub rate_limit_ip_subnet: u8,
|
||||
pub ip_allowlist_check_enabled: bool,
|
||||
pub is_vpc_acccess_proxy: bool,
|
||||
pub jwks_cache: JwkCache,
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use futures::TryFutureExt;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::{Instrument, debug, error, info};
|
||||
|
||||
use crate::auth::backend::ConsoleRedirectBackend;
|
||||
@@ -14,10 +15,8 @@ use crate::metrics::{Metrics, NumClientConnectionsGuard};
|
||||
use crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};
|
||||
use crate::proxy::connect_compute::{TcpMechanism, connect_to_compute};
|
||||
use crate::proxy::handshake::{HandshakeData, handshake};
|
||||
use crate::proxy::passthrough::ProxyPassthrough;
|
||||
use crate::proxy::{
|
||||
ClientRequestError, ErrorSource, prepare_client_connection, run_until_cancelled,
|
||||
};
|
||||
use crate::proxy::passthrough::passthrough;
|
||||
use crate::proxy::{ClientRequestError, prepare_client_connection, run_until_cancelled};
|
||||
|
||||
pub async fn task_main(
|
||||
config: &'static ProxyConfig,
|
||||
@@ -35,7 +34,6 @@ pub async fn task_main(
|
||||
socket2::SockRef::from(&listener).set_keepalive(true)?;
|
||||
|
||||
let connections = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
let cancellations = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
|
||||
while let Some(accept_result) =
|
||||
run_until_cancelled(listener.accept(), &cancellation_token).await
|
||||
@@ -49,11 +47,11 @@ pub async fn task_main(
|
||||
|
||||
let session_id = uuid::Uuid::new_v4();
|
||||
let cancellation_handler = Arc::clone(&cancellation_handler);
|
||||
let cancellations = cancellations.clone();
|
||||
|
||||
debug!(protocol = "tcp", %session_id, "accepted new TCP connection");
|
||||
|
||||
connections.spawn(async move {
|
||||
let tracker = connections.token();
|
||||
tokio::spawn(async move {
|
||||
let (socket, peer_addr) = match read_proxy_protocol(socket).await {
|
||||
Err(e) => {
|
||||
error!("per-client task finished with an error: {e:#}");
|
||||
@@ -103,99 +101,80 @@ pub async fn task_main(
|
||||
&config.region,
|
||||
);
|
||||
|
||||
let span = ctx.span();
|
||||
let mut slot = Some(ctx);
|
||||
let res = handle_client(
|
||||
config,
|
||||
backend,
|
||||
&ctx,
|
||||
&mut slot,
|
||||
cancellation_handler,
|
||||
socket,
|
||||
conn_gauge,
|
||||
cancellations,
|
||||
tracker,
|
||||
)
|
||||
.instrument(ctx.span())
|
||||
.boxed()
|
||||
.instrument(span)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Err(e) => {
|
||||
match (slot, res) {
|
||||
(None, _) => {}
|
||||
(Some(ctx), Ok(())) => {
|
||||
ctx.success();
|
||||
}
|
||||
(Some(ctx), Err(e)) => {
|
||||
ctx.set_error_kind(e.get_error_kind());
|
||||
error!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
|
||||
}
|
||||
Ok(None) => {
|
||||
ctx.set_success();
|
||||
}
|
||||
Ok(Some(p)) => {
|
||||
ctx.set_success();
|
||||
let _disconnect = ctx.log_connect();
|
||||
match p.proxy_pass(&config.connect_to_compute).await {
|
||||
Ok(()) => {}
|
||||
Err(ErrorSource::Client(e)) => {
|
||||
error!(
|
||||
?session_id,
|
||||
"per-client task finished with an IO error from the client: {e:#}"
|
||||
);
|
||||
}
|
||||
Err(ErrorSource::Compute(e)) => {
|
||||
error!(
|
||||
?session_id,
|
||||
"per-client task finished with an IO error from the compute: {e:#}"
|
||||
);
|
||||
}
|
||||
}
|
||||
tracing::warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
connections.close();
|
||||
cancellations.close();
|
||||
drop(listener);
|
||||
|
||||
// Drain connections
|
||||
connections.wait().await;
|
||||
cancellations.wait().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
|
||||
config: &'static ProxyConfig,
|
||||
backend: &'static ConsoleRedirectBackend,
|
||||
ctx: &RequestContext,
|
||||
ctx_slot: &mut Option<RequestContext>,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
stream: S,
|
||||
conn_gauge: NumClientConnectionsGuard<'static>,
|
||||
cancellations: tokio_util::task::task_tracker::TaskTracker,
|
||||
) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
|
||||
debug!(
|
||||
protocol = %ctx.protocol(),
|
||||
"handling interactive connection from client"
|
||||
);
|
||||
tracker: TaskTrackerToken,
|
||||
) -> Result<(), ClientRequestError> {
|
||||
let protocol = ctx_slot.as_ref().expect("context must be set").protocol();
|
||||
debug!(%protocol, "handling interactive connection from client");
|
||||
|
||||
let metrics = &Metrics::get().proxy;
|
||||
let proto = ctx.protocol();
|
||||
let request_gauge = metrics.connection_requests.guard(proto);
|
||||
let request_gauge = metrics.connection_requests.guard(protocol);
|
||||
|
||||
let tls = config.tls_config.load();
|
||||
let tls = tls.as_deref();
|
||||
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
|
||||
let data = {
|
||||
let ctx = ctx_slot.as_ref().expect("context must be set");
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let _pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
let do_handshake = handshake(ctx, stream, tracker, tls, record_handshake_error);
|
||||
tokio::time::timeout(config.handshake_timeout, do_handshake).await??
|
||||
};
|
||||
|
||||
let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)
|
||||
.await??
|
||||
{
|
||||
let (mut stream, params) = match data {
|
||||
HandshakeData::Startup(stream, params) => (stream, params),
|
||||
HandshakeData::Cancel(cancel_key_data) => {
|
||||
HandshakeData::Cancel(cancel_key_data, tracker) => {
|
||||
// spawn a task to cancel the session, but don't wait for it
|
||||
cancellations.spawn({
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let ctx = ctx.clone();
|
||||
tokio::spawn({
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let ctx = ctx_slot.take().expect("context must be set");
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
|
||||
cancel_span.follows_from(tracing::Span::current());
|
||||
async move {
|
||||
let _tracker = tracker;
|
||||
cancellation_handler_clone
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
@@ -205,15 +184,17 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
backend.get_api(),
|
||||
)
|
||||
.await
|
||||
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
|
||||
}.instrument(cancel_span)
|
||||
.inspect_err(|e| debug!(error = ?e, "cancel_session failed"))
|
||||
.ok();
|
||||
}
|
||||
.instrument(cancel_span)
|
||||
});
|
||||
|
||||
return Ok(None);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
drop(pause);
|
||||
|
||||
let ctx = ctx_slot.as_ref().expect("context must be set");
|
||||
ctx.set_db_options(params.clone());
|
||||
|
||||
let (node_info, user_info, _ip_allowlist) = match backend
|
||||
@@ -221,22 +202,24 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
.await
|
||||
{
|
||||
Ok(auth_result) => auth_result,
|
||||
Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
|
||||
Err(e) => {
|
||||
return stream.throw_error(e, Some(ctx)).await?;
|
||||
}
|
||||
};
|
||||
|
||||
let node = connect_to_compute(
|
||||
let mut node = connect_to_compute(
|
||||
ctx,
|
||||
&TcpMechanism {
|
||||
TcpMechanism {
|
||||
user_info,
|
||||
params_compat: true,
|
||||
params: ¶ms,
|
||||
locks: &config.connect_compute_locks,
|
||||
},
|
||||
&node_info,
|
||||
node_info,
|
||||
config.wake_compute_retry_config,
|
||||
&config.connect_to_compute,
|
||||
)
|
||||
.or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })
|
||||
.or_else(|e| stream.throw_error(e, Some(ctx)))
|
||||
.await?;
|
||||
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
@@ -244,17 +227,28 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
|
||||
session.write_cancel_key(node.cancel_closure.clone())?;
|
||||
|
||||
prepare_client_connection(&node, *session.key(), &mut stream);
|
||||
let stream = stream.flush_and_into_inner().await?;
|
||||
prepare_client_connection(&node, *session.key(), &mut stream).await?;
|
||||
|
||||
Ok(Some(ProxyPassthrough {
|
||||
client: stream,
|
||||
aux: node.aux.clone(),
|
||||
private_link_id: None,
|
||||
compute: node,
|
||||
session_id: ctx.session_id(),
|
||||
cancel: session,
|
||||
_req: request_gauge,
|
||||
_conn: conn_gauge,
|
||||
}))
|
||||
// Before proxy passing, forward to compute whatever data is left in the
|
||||
// PqStream input buffer. Normally there is none, but our serverless npm
|
||||
// driver in pipeline mode sends startup, password and first query
|
||||
// immediately after opening the connection.
|
||||
let (stream, read_buf, tracker) = stream.into_inner();
|
||||
node.stream.write_all(&read_buf).await?;
|
||||
|
||||
let ctx = ctx_slot.take().expect("context must be set");
|
||||
ctx.set_success();
|
||||
|
||||
tokio::spawn(passthrough(
|
||||
ctx,
|
||||
&config.connect_to_compute,
|
||||
stream,
|
||||
node,
|
||||
session,
|
||||
request_gauge,
|
||||
conn_gauge,
|
||||
tracker,
|
||||
));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::net::IpAddr;
|
||||
|
||||
use chrono::Utc;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use smol_str::SmolStr;
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::field::display;
|
||||
@@ -19,7 +20,6 @@ use crate::metrics::{
|
||||
ConnectOutcome, InvalidEndpointsGroup, LatencyAccumulated, LatencyTimer, Metrics, Protocol,
|
||||
Waiting,
|
||||
};
|
||||
use crate::pqproto::StartupMessageParams;
|
||||
use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
|
||||
use crate::types::{DbName, EndpointId, RoleName};
|
||||
|
||||
@@ -38,7 +38,7 @@ pub struct RequestContext(
|
||||
/// I would typically use a RefCell but that would break the `Send` requirements
|
||||
/// so we need something with thread-safety. `TryLock` is a cheap alternative
|
||||
/// that offers similar semantics to a `RefCell` but with synchronisation.
|
||||
TryLock<RequestContextInner>,
|
||||
TryLock<Box<RequestContextInner>>,
|
||||
);
|
||||
|
||||
struct RequestContextInner {
|
||||
@@ -89,7 +89,7 @@ pub(crate) enum AuthMethod {
|
||||
impl Clone for RequestContext {
|
||||
fn clone(&self) -> Self {
|
||||
let inner = self.0.try_lock().expect("should not deadlock");
|
||||
let new = RequestContextInner {
|
||||
let new = Box::new(RequestContextInner {
|
||||
conn_info: inner.conn_info.clone(),
|
||||
session_id: inner.session_id,
|
||||
protocol: inner.protocol,
|
||||
@@ -117,7 +117,7 @@ impl Clone for RequestContext {
|
||||
disconnect_sender: None,
|
||||
latency_timer: LatencyTimer::noop(inner.protocol),
|
||||
disconnect_timestamp: inner.disconnect_timestamp,
|
||||
};
|
||||
});
|
||||
|
||||
Self(TryLock::new(new))
|
||||
}
|
||||
@@ -140,7 +140,7 @@ impl RequestContext {
|
||||
role = tracing::field::Empty,
|
||||
);
|
||||
|
||||
let inner = RequestContextInner {
|
||||
let inner = Box::new(RequestContextInner {
|
||||
conn_info,
|
||||
session_id,
|
||||
protocol,
|
||||
@@ -168,7 +168,7 @@ impl RequestContext {
|
||||
disconnect_sender: LOG_CHAN_DISCONNECT.get().and_then(|tx| tx.upgrade()),
|
||||
latency_timer: LatencyTimer::new(protocol),
|
||||
disconnect_timestamp: None,
|
||||
};
|
||||
});
|
||||
|
||||
Self(TryLock::new(inner))
|
||||
}
|
||||
@@ -370,18 +370,6 @@ impl RequestContext {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn latency_timer_pause_at(
|
||||
&self,
|
||||
at: tokio::time::Instant,
|
||||
waiting_for: Waiting,
|
||||
) -> LatencyTimerPause<'_> {
|
||||
LatencyTimerPause {
|
||||
ctx: self,
|
||||
start: at,
|
||||
waiting_for,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_proxy_latency(&self) -> LatencyAccumulated {
|
||||
self.0
|
||||
.try_lock()
|
||||
@@ -534,7 +522,7 @@ impl Drop for RequestContextInner {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DisconnectLogger(RequestContextInner);
|
||||
pub struct DisconnectLogger(Box<RequestContextInner>);
|
||||
|
||||
impl Drop for DisconnectLogger {
|
||||
fn drop(&mut self) {
|
||||
|
||||
@@ -11,6 +11,7 @@ use parquet::file::metadata::RowGroupMetaDataPtr;
|
||||
use parquet::file::properties::{DEFAULT_PAGE_SIZE, WriterProperties, WriterPropertiesPtr};
|
||||
use parquet::file::writer::SerializedFileWriter;
|
||||
use parquet::record::RecordWriter;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath, RemoteStorageConfig, TimeoutOrCancel};
|
||||
use serde::ser::SerializeMap;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -23,7 +24,6 @@ use super::{LOG_CHAN, RequestContextInner};
|
||||
use crate::config::remote_storage_from_toml;
|
||||
use crate::context::LOG_CHAN_DISCONNECT;
|
||||
use crate::ext::TaskExt;
|
||||
use crate::pqproto::StartupMessageParams;
|
||||
|
||||
#[derive(clap::Args, Clone, Debug)]
|
||||
pub struct ParquetUploadArgs {
|
||||
|
||||
@@ -7,9 +7,7 @@ use std::time::Duration;
|
||||
|
||||
use ::http::HeaderName;
|
||||
use ::http::header::AUTHORIZATION;
|
||||
use bytes::Bytes;
|
||||
use futures::TryFutureExt;
|
||||
use hyper::StatusCode;
|
||||
use postgres_client::config::SslMode;
|
||||
use tokio::time::Instant;
|
||||
use tracing::{Instrument, debug, info, info_span, warn};
|
||||
@@ -17,6 +15,7 @@ use tracing::{Instrument, debug, info, info_span, warn};
|
||||
use super::super::messages::{ControlPlaneErrorMessage, GetEndpointAccessControl, WakeCompute};
|
||||
use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::backend::jwt::AuthRule;
|
||||
use crate::cache::Cached;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::caches::ApiCaches;
|
||||
use crate::control_plane::errors::{
|
||||
@@ -25,12 +24,12 @@ use crate::control_plane::errors::{
|
||||
use crate::control_plane::locks::ApiLocks;
|
||||
use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
|
||||
use crate::control_plane::{
|
||||
AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, EndpointAccessControl, NodeInfo,
|
||||
RoleAccessControl,
|
||||
AccessBlockerFlags, AuthInfo, AuthSecret, CachedAccessBlockerFlags, CachedAllowedIps,
|
||||
CachedAllowedVpcEndpointIds, CachedNodeInfo, CachedRoleSecret, NodeInfo,
|
||||
};
|
||||
use crate::metrics::Metrics;
|
||||
use crate::metrics::{CacheOutcome, Metrics};
|
||||
use crate::rate_limiter::WakeComputeRateLimiter;
|
||||
use crate::types::{EndpointCacheKey, EndpointId, RoleName};
|
||||
use crate::types::{EndpointCacheKey, EndpointId};
|
||||
use crate::{compute, http, scram};
|
||||
|
||||
pub(crate) const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
|
||||
@@ -67,41 +66,66 @@ impl NeonControlPlaneClient {
|
||||
self.endpoint.url().as_str()
|
||||
}
|
||||
|
||||
async fn do_get_auth_req(
|
||||
async fn do_get_auth_info(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
if !self
|
||||
.caches
|
||||
.endpoints_cache
|
||||
.is_valid(ctx, &user_info.endpoint.normalize())
|
||||
{
|
||||
// TODO: refactor this because it's weird
|
||||
// this is a failure to authenticate but we return Ok.
|
||||
info!("endpoint is not valid, skipping the request");
|
||||
return Ok(AuthInfo::default());
|
||||
}
|
||||
self.do_get_auth_req(user_info, &ctx.session_id(), Some(ctx))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn do_get_auth_req(
|
||||
&self,
|
||||
user_info: &ComputeUserInfo,
|
||||
session_id: &uuid::Uuid,
|
||||
ctx: Option<&RequestContext>,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
let request_id: String = session_id.to_string();
|
||||
let application_name = if let Some(ctx) = ctx {
|
||||
ctx.console_application_name()
|
||||
} else {
|
||||
"auth_cancellation".to_string()
|
||||
};
|
||||
|
||||
async {
|
||||
let response = {
|
||||
let request = self
|
||||
.endpoint
|
||||
.get_path("get_endpoint_access_control")
|
||||
.header(X_REQUEST_ID, ctx.session_id().to_string())
|
||||
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", ctx.session_id())])
|
||||
.query(&[
|
||||
("application_name", ctx.console_application_name().as_str()),
|
||||
("endpointish", endpoint.as_str()),
|
||||
("role", role.as_str()),
|
||||
])
|
||||
.build()?;
|
||||
let request = self
|
||||
.endpoint
|
||||
.get_path("get_endpoint_access_control")
|
||||
.header(X_REQUEST_ID, &request_id)
|
||||
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||
.query(&[("session_id", session_id)])
|
||||
.query(&[
|
||||
("application_name", application_name.as_str()),
|
||||
("endpointish", user_info.endpoint.as_str()),
|
||||
("role", user_info.user.as_str()),
|
||||
])
|
||||
.build()?;
|
||||
|
||||
debug!(url = request.url().as_str(), "sending http request");
|
||||
let start = Instant::now();
|
||||
let _pause = ctx.latency_timer_pause_at(start, crate::metrics::Waiting::Cplane);
|
||||
let response = self.endpoint.execute(request).await?;
|
||||
|
||||
info!(duration = ?start.elapsed(), "received http response");
|
||||
|
||||
response
|
||||
debug!(url = request.url().as_str(), "sending http request");
|
||||
let start = Instant::now();
|
||||
let response = match ctx {
|
||||
Some(ctx) => {
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
|
||||
let rsp = self.endpoint.execute(request).await;
|
||||
drop(pause);
|
||||
rsp?
|
||||
}
|
||||
None => self.endpoint.execute(request).await?,
|
||||
};
|
||||
|
||||
let body = match parse_body::<GetEndpointAccessControl>(
|
||||
response.status(),
|
||||
response.bytes().await?,
|
||||
) {
|
||||
info!(duration = ?start.elapsed(), "received http response");
|
||||
let body = match parse_body::<GetEndpointAccessControl>(response).await {
|
||||
Ok(body) => body,
|
||||
// Error 404 is special: it's ok not to have a secret.
|
||||
// TODO(anna): retry
|
||||
@@ -156,7 +180,7 @@ impl NeonControlPlaneClient {
|
||||
async fn do_get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
|
||||
if !self
|
||||
.caches
|
||||
@@ -192,10 +216,7 @@ impl NeonControlPlaneClient {
|
||||
drop(pause);
|
||||
info!(duration = ?start.elapsed(), "received http response");
|
||||
|
||||
let body = parse_body::<EndpointJwksResponse>(
|
||||
response.status(),
|
||||
response.bytes().await.map_err(ControlPlaneError::from)?,
|
||||
)?;
|
||||
let body = parse_body::<EndpointJwksResponse>(response).await?;
|
||||
|
||||
let rules = body
|
||||
.jwks
|
||||
@@ -247,7 +268,7 @@ impl NeonControlPlaneClient {
|
||||
let response = self.endpoint.execute(request).await?;
|
||||
drop(pause);
|
||||
info!(duration = ?start.elapsed(), "received http response");
|
||||
let body = parse_body::<WakeCompute>(response.status(), response.bytes().await?)?;
|
||||
let body = parse_body::<WakeCompute>(response).await?;
|
||||
|
||||
// Unfortunately, ownership won't let us use `Option::ok_or` here.
|
||||
let (host, port) = match parse_host_port(&body.address) {
|
||||
@@ -292,104 +313,225 @@ impl NeonControlPlaneClient {
|
||||
|
||||
impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_role_access_control(
|
||||
async fn get_role_secret(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
) -> Result<RoleAccessControl, crate::control_plane::errors::GetAuthInfoError> {
|
||||
let normalized_ep = &endpoint.normalize();
|
||||
if let Some(secret) = self
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
let user = &user_info.user;
|
||||
if let Some(role_secret) = self
|
||||
.caches
|
||||
.project_info
|
||||
.get_role_secret(normalized_ep, role)
|
||||
.get_role_secret(normalized_ep, user)
|
||||
{
|
||||
return Ok(secret);
|
||||
return Ok(role_secret);
|
||||
}
|
||||
|
||||
if !self.caches.endpoints_cache.is_valid(ctx, normalized_ep) {
|
||||
info!("endpoint is not valid, skipping the request");
|
||||
return Err(GetAuthInfoError::UnknownEndpoint);
|
||||
}
|
||||
|
||||
let auth_info = self.do_get_auth_req(ctx, endpoint, role).await?;
|
||||
|
||||
let control = EndpointAccessControl {
|
||||
allowed_ips: Arc::new(auth_info.allowed_ips),
|
||||
allowed_vpce: Arc::new(auth_info.allowed_vpc_endpoint_ids),
|
||||
flags: auth_info.access_blocker_flags,
|
||||
};
|
||||
let role_control = RoleAccessControl {
|
||||
secret: auth_info.secret,
|
||||
};
|
||||
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
|
||||
self.caches.project_info.insert_endpoint_access(
|
||||
auth_info.account_id,
|
||||
self.caches.project_info.insert_role_secret(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
role.into(),
|
||||
control,
|
||||
role_control.clone(),
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
Arc::new(auth_info.allowed_ips),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
Arc::new(auth_info.allowed_vpc_endpoint_ids),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
auth_info.access_blocker_flags,
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
|
||||
Ok(role_control)
|
||||
// When we just got a secret, we don't need to invalidate it.
|
||||
Ok(Cached::new_uncached(auth_info.secret))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_endpoint_access_control(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
) -> Result<EndpointAccessControl, GetAuthInfoError> {
|
||||
let normalized_ep = &endpoint.normalize();
|
||||
if let Some(control) = self.caches.project_info.get_endpoint_access(normalized_ep) {
|
||||
return Ok(control);
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedIps, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
if let Some(allowed_ips) = self.caches.project_info.get_allowed_ips(normalized_ep) {
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.allowed_ips_cache_misses // TODO SR: Should we rename this variable to something like allowed_ip_cache_stats?
|
||||
.inc(CacheOutcome::Hit);
|
||||
return Ok(allowed_ips);
|
||||
}
|
||||
|
||||
if !self.caches.endpoints_cache.is_valid(ctx, normalized_ep) {
|
||||
info!("endpoint is not valid, skipping the request");
|
||||
return Err(GetAuthInfoError::UnknownEndpoint);
|
||||
}
|
||||
|
||||
let auth_info = self.do_get_auth_req(ctx, endpoint, role).await?;
|
||||
|
||||
let control = EndpointAccessControl {
|
||||
allowed_ips: Arc::new(auth_info.allowed_ips),
|
||||
allowed_vpce: Arc::new(auth_info.allowed_vpc_endpoint_ids),
|
||||
flags: auth_info.access_blocker_flags,
|
||||
};
|
||||
let role_control = RoleAccessControl {
|
||||
secret: auth_info.secret,
|
||||
};
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.allowed_ips_cache_misses
|
||||
.inc(CacheOutcome::Miss);
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
|
||||
let access_blocker_flags = auth_info.access_blocker_flags;
|
||||
let user = &user_info.user;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
|
||||
self.caches.project_info.insert_endpoint_access(
|
||||
auth_info.account_id,
|
||||
self.caches.project_info.insert_role_secret(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
role.into(),
|
||||
control.clone(),
|
||||
role_control,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_vpc_endpoint_ids.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
access_blocker_flags,
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok(Cached::new_uncached(allowed_ips))
|
||||
}
|
||||
|
||||
Ok(control)
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
if let Some(allowed_vpc_endpoint_ids) = self
|
||||
.caches
|
||||
.project_info
|
||||
.get_allowed_vpc_endpoint_ids(normalized_ep)
|
||||
{
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.vpc_endpoint_id_cache_stats
|
||||
.inc(CacheOutcome::Hit);
|
||||
return Ok(allowed_vpc_endpoint_ids);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.vpc_endpoint_id_cache_stats
|
||||
.inc(CacheOutcome::Miss);
|
||||
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
|
||||
let access_blocker_flags = auth_info.access_blocker_flags;
|
||||
let user = &user_info.user;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_vpc_endpoint_ids.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
access_blocker_flags,
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok(Cached::new_uncached(allowed_vpc_endpoint_ids))
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, GetAuthInfoError> {
|
||||
let normalized_ep = &user_info.endpoint.normalize();
|
||||
if let Some(access_blocker_flags) = self
|
||||
.caches
|
||||
.project_info
|
||||
.get_block_public_or_vpc_access(normalized_ep)
|
||||
{
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.access_blocker_flags_cache_stats
|
||||
.inc(CacheOutcome::Hit);
|
||||
return Ok(access_blocker_flags);
|
||||
}
|
||||
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.access_blocker_flags_cache_stats
|
||||
.inc(CacheOutcome::Miss);
|
||||
|
||||
let auth_info = self.do_get_auth_info(ctx, user_info).await?;
|
||||
let allowed_ips = Arc::new(auth_info.allowed_ips);
|
||||
let allowed_vpc_endpoint_ids = Arc::new(auth_info.allowed_vpc_endpoint_ids);
|
||||
let access_blocker_flags = auth_info.access_blocker_flags;
|
||||
let user = &user_info.user;
|
||||
let account_id = auth_info.account_id;
|
||||
if let Some(project_id) = auth_info.project_id {
|
||||
let normalized_ep_int = normalized_ep.into();
|
||||
self.caches.project_info.insert_role_secret(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
user.into(),
|
||||
auth_info.secret.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_ips(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_ips.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_allowed_vpc_endpoint_ids(
|
||||
account_id,
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
allowed_vpc_endpoint_ids.clone(),
|
||||
);
|
||||
self.caches.project_info.insert_block_public_or_vpc_access(
|
||||
project_id,
|
||||
normalized_ep_int,
|
||||
access_blocker_flags.clone(),
|
||||
);
|
||||
ctx.set_project_id(project_id);
|
||||
}
|
||||
Ok(Cached::new_uncached(access_blocker_flags))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
|
||||
self.do_get_endpoint_jwks(ctx, endpoint).await
|
||||
}
|
||||
@@ -498,33 +640,33 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
|
||||
}
|
||||
|
||||
/// Parse http response body, taking status code into account.
|
||||
fn parse_body<T: for<'a> serde::Deserialize<'a>>(
|
||||
status: StatusCode,
|
||||
body: Bytes,
|
||||
async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
|
||||
response: http::Response,
|
||||
) -> Result<T, ControlPlaneError> {
|
||||
let status = response.status();
|
||||
if status.is_success() {
|
||||
// We shouldn't log raw body because it may contain secrets.
|
||||
info!("request succeeded, processing the body");
|
||||
return Ok(serde_json::from_slice(&body).map_err(std::io::Error::other)?);
|
||||
return Ok(response.json().await?);
|
||||
}
|
||||
|
||||
let s = response.bytes().await?;
|
||||
// Log plaintext to be able to detect, whether there are some cases not covered by the error struct.
|
||||
info!("response_error plaintext: {:?}", body);
|
||||
info!("response_error plaintext: {:?}", s);
|
||||
|
||||
// Don't throw an error here because it's not as important
|
||||
// as the fact that the request itself has failed.
|
||||
let mut body = serde_json::from_slice(&body).unwrap_or_else(|e| {
|
||||
let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| {
|
||||
warn!("failed to parse error body: {e}");
|
||||
Box::new(ControlPlaneErrorMessage {
|
||||
ControlPlaneErrorMessage {
|
||||
error: "reason unclear (malformed error message)".into(),
|
||||
http_status_code: status,
|
||||
status: None,
|
||||
})
|
||||
}
|
||||
});
|
||||
body.http_status_code = status;
|
||||
|
||||
warn!("console responded with an error ({status}): {body:?}");
|
||||
Err(ControlPlaneError::Message(body))
|
||||
Err(ControlPlaneError::Message(Box::new(body)))
|
||||
}
|
||||
|
||||
fn parse_host_port(input: &str) -> Option<(&str, u16)> {
|
||||
|
||||
@@ -15,14 +15,14 @@ use crate::auth::backend::ComputeUserInfo;
|
||||
use crate::auth::backend::jwt::AuthRule;
|
||||
use crate::cache::Cached;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::client::{
|
||||
CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedRoleSecret,
|
||||
};
|
||||
use crate::control_plane::errors::{
|
||||
ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError,
|
||||
};
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
use crate::control_plane::{
|
||||
AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, EndpointAccessControl, NodeInfo,
|
||||
RoleAccessControl,
|
||||
};
|
||||
use crate::control_plane::{AccessBlockerFlags, AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo};
|
||||
use crate::intern::RoleNameInt;
|
||||
use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
|
||||
use crate::url::ApiUrl;
|
||||
@@ -66,8 +66,7 @@ impl MockControlPlane {
|
||||
|
||||
async fn do_get_auth_info(
|
||||
&self,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
let (secret, allowed_ips) = async {
|
||||
// Perhaps we could persist this connection, but then we'd have to
|
||||
@@ -81,7 +80,7 @@ impl MockControlPlane {
|
||||
let secret = if let Some(entry) = get_execute_postgres_query(
|
||||
&client,
|
||||
"select rolpassword from pg_catalog.pg_authid where rolname = $1",
|
||||
&[&role.as_str()],
|
||||
&[&&*user_info.user],
|
||||
"rolpassword",
|
||||
)
|
||||
.await?
|
||||
@@ -90,7 +89,7 @@ impl MockControlPlane {
|
||||
let secret = scram::ServerSecret::parse(&entry).map(AuthSecret::Scram);
|
||||
secret.or_else(|| parse_md5(&entry).map(AuthSecret::Md5))
|
||||
} else {
|
||||
warn!("user '{role}' does not exist");
|
||||
warn!("user '{}' does not exist", user_info.user);
|
||||
None
|
||||
};
|
||||
|
||||
@@ -98,7 +97,7 @@ impl MockControlPlane {
|
||||
match get_execute_postgres_query(
|
||||
&client,
|
||||
"select allowed_ips from neon_control_plane.endpoints where endpoint_id = $1",
|
||||
&[&endpoint.as_str()],
|
||||
&[&user_info.endpoint.as_str()],
|
||||
"allowed_ips",
|
||||
)
|
||||
.await?
|
||||
@@ -134,7 +133,7 @@ impl MockControlPlane {
|
||||
|
||||
async fn do_get_endpoint_jwks(
|
||||
&self,
|
||||
endpoint: &EndpointId,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
|
||||
let (client, connection) =
|
||||
tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
|
||||
@@ -223,36 +222,53 @@ async fn get_execute_postgres_query(
|
||||
}
|
||||
|
||||
impl super::ControlPlaneApi for MockControlPlane {
|
||||
async fn get_endpoint_access_control(
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn get_role_secret(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
) -> Result<EndpointAccessControl, GetAuthInfoError> {
|
||||
let info = self.do_get_auth_info(endpoint, role).await?;
|
||||
Ok(EndpointAccessControl {
|
||||
allowed_ips: Arc::new(info.allowed_ips),
|
||||
allowed_vpce: Arc::new(info.allowed_vpc_endpoint_ids),
|
||||
flags: info.access_blocker_flags,
|
||||
})
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, GetAuthInfoError> {
|
||||
Ok(CachedRoleSecret::new_uncached(
|
||||
self.do_get_auth_info(user_info).await?.secret,
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_role_access_control(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
) -> Result<RoleAccessControl, GetAuthInfoError> {
|
||||
let info = self.do_get_auth_info(endpoint, role).await?;
|
||||
Ok(RoleAccessControl {
|
||||
secret: info.secret,
|
||||
})
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedIps, GetAuthInfoError> {
|
||||
Ok(Cached::new_uncached(Arc::new(
|
||||
self.do_get_auth_info(user_info).await?.allowed_ips,
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, super::errors::GetAuthInfoError> {
|
||||
Ok(Cached::new_uncached(Arc::new(
|
||||
self.do_get_auth_info(user_info)
|
||||
.await?
|
||||
.allowed_vpc_endpoint_ids,
|
||||
)))
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<super::CachedAccessBlockerFlags, super::errors::GetAuthInfoError> {
|
||||
Ok(Cached::new_uncached(
|
||||
self.do_get_auth_info(user_info).await?.access_blocker_flags,
|
||||
))
|
||||
}
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
_ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
|
||||
self.do_get_endpoint_jwks(endpoint).await
|
||||
}
|
||||
|
||||
@@ -16,14 +16,15 @@ use crate::cache::endpoints::EndpointsCache;
|
||||
use crate::cache::project_info::ProjectInfoCacheImpl;
|
||||
use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::{CachedNodeInfo, ControlPlaneApi, NodeInfoCache, errors};
|
||||
use crate::control_plane::{
|
||||
CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo,
|
||||
CachedRoleSecret, ControlPlaneApi, NodeInfoCache, errors,
|
||||
};
|
||||
use crate::error::ReportableError;
|
||||
use crate::metrics::ApiLockMetrics;
|
||||
use crate::rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token};
|
||||
use crate::types::EndpointId;
|
||||
|
||||
use super::{EndpointAccessControl, RoleAccessControl};
|
||||
|
||||
#[non_exhaustive]
|
||||
#[derive(Clone)]
|
||||
pub enum ControlPlaneClient {
|
||||
@@ -39,42 +40,68 @@ pub enum ControlPlaneClient {
|
||||
}
|
||||
|
||||
impl ControlPlaneApi for ControlPlaneClient {
|
||||
async fn get_role_access_control(
|
||||
async fn get_role_secret(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &crate::types::RoleName,
|
||||
) -> Result<RoleAccessControl, errors::GetAuthInfoError> {
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_role_access_control(ctx, endpoint, role).await,
|
||||
Self::ProxyV1(api) => api.get_role_secret(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_role_access_control(ctx, endpoint, role).await,
|
||||
Self::PostgresMock(api) => api.get_role_secret(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(_api) => {
|
||||
Self::Test(_) => {
|
||||
unreachable!("this function should never be called in the test backend")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_endpoint_access_control(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &crate::types::RoleName,
|
||||
) -> Result<EndpointAccessControl, errors::GetAuthInfoError> {
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_endpoint_access_control(ctx, endpoint, role).await,
|
||||
Self::ProxyV1(api) => api.get_allowed_ips(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_endpoint_access_control(ctx, endpoint, role).await,
|
||||
Self::PostgresMock(api) => api.get_allowed_ips(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(api) => api.get_access_control(),
|
||||
Self::Test(api) => api.get_allowed_ips(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_allowed_vpc_endpoint_ids(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(api) => api.get_allowed_vpc_endpoint_ids(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
Self::PostgresMock(api) => api.get_block_public_or_vpc_access(ctx, user_info).await,
|
||||
#[cfg(test)]
|
||||
Self::Test(api) => api.get_block_public_or_vpc_access(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError> {
|
||||
match self {
|
||||
Self::ProxyV1(api) => api.get_endpoint_jwks(ctx, endpoint).await,
|
||||
@@ -104,7 +131,15 @@ impl ControlPlaneApi for ControlPlaneClient {
|
||||
pub(crate) trait TestControlPlaneClient: Send + Sync + 'static {
|
||||
fn wake_compute(&self) -> Result<CachedNodeInfo, errors::WakeComputeError>;
|
||||
|
||||
fn get_access_control(&self) -> Result<EndpointAccessControl, errors::GetAuthInfoError>;
|
||||
fn get_allowed_ips(&self) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
|
||||
|
||||
fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
|
||||
|
||||
fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
|
||||
|
||||
fn dyn_clone(&self) -> Box<dyn TestControlPlaneClient>;
|
||||
}
|
||||
@@ -274,7 +309,7 @@ impl FetchAuthRules for ControlPlaneClient {
|
||||
ctx: &RequestContext,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
|
||||
self.get_endpoint_jwks(ctx, &endpoint)
|
||||
self.get_endpoint_jwks(ctx, endpoint)
|
||||
.await
|
||||
.map_err(FetchAuthRulesError::GetEndpointJwks)
|
||||
}
|
||||
|
||||
@@ -99,10 +99,6 @@ pub(crate) enum GetAuthInfoError {
|
||||
|
||||
#[error(transparent)]
|
||||
ApiError(ControlPlaneError),
|
||||
|
||||
/// Proxy does not know about the endpoint in advanced
|
||||
#[error("endpoint not found in endpoint cache")]
|
||||
UnknownEndpoint,
|
||||
}
|
||||
|
||||
// This allows more useful interactions than `#[from]`.
|
||||
@@ -119,8 +115,6 @@ impl UserFacingError for GetAuthInfoError {
|
||||
Self::BadSecret => REQUEST_FAILED.to_owned(),
|
||||
// However, API might return a meaningful error.
|
||||
Self::ApiError(e) => e.to_string_client(),
|
||||
// pretend like control plane returned an error.
|
||||
Self::UnknownEndpoint => REQUEST_FAILED.to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -130,8 +124,6 @@ impl ReportableError for GetAuthInfoError {
|
||||
match self {
|
||||
Self::BadSecret => crate::error::ErrorKind::ControlPlane,
|
||||
Self::ApiError(_) => crate::error::ErrorKind::ControlPlane,
|
||||
// we only apply endpoint filtering if control plane is under high load.
|
||||
Self::UnknownEndpoint => crate::error::ErrorKind::ServiceRateLimit,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,16 +11,16 @@ pub(crate) mod errors;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::auth::IpPattern;
|
||||
use crate::auth::backend::jwt::AuthRule;
|
||||
use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
|
||||
use crate::auth::{AuthError, IpPattern, check_peer_addr_is_in_list};
|
||||
use crate::cache::project_info::ProjectInfoCacheImpl;
|
||||
use crate::cache::{Cached, TimedLru};
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
|
||||
use crate::intern::{AccountIdInt, ProjectIdInt};
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::types::{EndpointCacheKey, EndpointId, RoleName};
|
||||
use crate::types::{EndpointCacheKey, EndpointId};
|
||||
use crate::{compute, scram};
|
||||
|
||||
/// Various cache-related types.
|
||||
@@ -101,7 +101,7 @@ impl NodeInfo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Default)]
|
||||
#[derive(Clone, Default, Eq, PartialEq, Debug)]
|
||||
pub(crate) struct AccessBlockerFlags {
|
||||
pub public_access_blocked: bool,
|
||||
pub vpc_access_blocked: bool,
|
||||
@@ -110,78 +110,47 @@ pub(crate) struct AccessBlockerFlags {
|
||||
pub(crate) type NodeInfoCache =
|
||||
TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneErrorMessage>>>;
|
||||
pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RoleAccessControl {
|
||||
pub secret: Option<AuthSecret>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct EndpointAccessControl {
|
||||
pub allowed_ips: Arc<Vec<IpPattern>>,
|
||||
pub allowed_vpce: Arc<Vec<String>>,
|
||||
pub flags: AccessBlockerFlags,
|
||||
}
|
||||
|
||||
impl EndpointAccessControl {
|
||||
pub fn check(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
check_ip_allowed: bool,
|
||||
check_vpc_allowed: bool,
|
||||
) -> Result<(), AuthError> {
|
||||
if check_ip_allowed && !check_peer_addr_is_in_list(&ctx.peer_addr(), &self.allowed_ips) {
|
||||
return Err(AuthError::IpAddressNotAllowed(ctx.peer_addr()));
|
||||
}
|
||||
|
||||
// check if a VPC endpoint ID is coming in and if yes, if it's allowed
|
||||
if check_vpc_allowed {
|
||||
if self.flags.vpc_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
let incoming_vpc_endpoint_id = match ctx.extra() {
|
||||
None => return Err(AuthError::MissingVPCEndpointId),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
|
||||
let vpce = &self.allowed_vpce;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !vpce.is_empty() && !vpce.contains(&incoming_vpc_endpoint_id) {
|
||||
return Err(AuthError::vpc_endpoint_id_not_allowed(
|
||||
incoming_vpc_endpoint_id,
|
||||
));
|
||||
}
|
||||
} else if self.flags.public_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
|
||||
pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
|
||||
pub(crate) type CachedAllowedVpcEndpointIds =
|
||||
Cached<&'static ProjectInfoCacheImpl, Arc<Vec<String>>>;
|
||||
pub(crate) type CachedAccessBlockerFlags =
|
||||
Cached<&'static ProjectInfoCacheImpl, AccessBlockerFlags>;
|
||||
|
||||
/// This will allocate per each call, but the http requests alone
|
||||
/// already require a few allocations, so it should be fine.
|
||||
pub(crate) trait ControlPlaneApi {
|
||||
async fn get_role_access_control(
|
||||
/// Get the client's auth secret for authentication.
|
||||
/// Returns option because user not found situation is special.
|
||||
/// We still have to mock the scram to avoid leaking information that user doesn't exist.
|
||||
async fn get_role_secret(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
) -> Result<RoleAccessControl, errors::GetAuthInfoError>;
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_endpoint_access_control(
|
||||
async fn get_allowed_ips(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
role: &RoleName,
|
||||
) -> Result<EndpointAccessControl, errors::GetAuthInfoError>;
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAllowedVpcEndpointIds, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
user_info: &ComputeUserInfo,
|
||||
) -> Result<CachedAccessBlockerFlags, errors::GetAuthInfoError>;
|
||||
|
||||
async fn get_endpoint_jwks(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
endpoint: &EndpointId,
|
||||
endpoint: EndpointId,
|
||||
) -> Result<Vec<AuthRule>, errors::GetEndpointJwksError>;
|
||||
|
||||
/// Wake up the compute node and return the corresponding connection info.
|
||||
|
||||
@@ -4,10 +4,9 @@
|
||||
|
||||
pub mod health_server;
|
||||
|
||||
use std::time::{Duration, Instant};
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::Bytes;
|
||||
use futures::FutureExt;
|
||||
use http::Method;
|
||||
use http_body_util::BodyExt;
|
||||
use hyper::body::Body;
|
||||
@@ -110,31 +109,15 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
/// Execute a [request](reqwest::Request).
|
||||
pub(crate) fn execute(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> impl Future<Output = Result<Response, Error>> {
|
||||
let metric = Metrics::get()
|
||||
pub(crate) async fn execute(&self, request: Request) -> Result<Response, Error> {
|
||||
let _timer = Metrics::get()
|
||||
.proxy
|
||||
.console_request_latency
|
||||
.with_labels(ConsoleRequest {
|
||||
.start_timer(ConsoleRequest {
|
||||
request: request.url().path(),
|
||||
});
|
||||
|
||||
let req = self.client.execute(request).boxed();
|
||||
|
||||
async move {
|
||||
let start = Instant::now();
|
||||
scopeguard::defer!({
|
||||
Metrics::get()
|
||||
.proxy
|
||||
.console_request_latency
|
||||
.get_metric(metric)
|
||||
.observe_duration_since(start);
|
||||
});
|
||||
|
||||
req.await
|
||||
}
|
||||
self.client.execute(request).await
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -92,7 +92,6 @@ mod logging;
|
||||
mod metrics;
|
||||
mod parse;
|
||||
mod pglb;
|
||||
mod pqproto;
|
||||
mod protocol2;
|
||||
mod proxy;
|
||||
mod rate_limiter;
|
||||
|
||||
@@ -1,693 +0,0 @@
|
||||
//! Postgres protocol codec
|
||||
//!
|
||||
//! <https://www.postgresql.org/docs/current/protocol-message-formats.html>
|
||||
|
||||
use std::fmt;
|
||||
use std::io::{self, Cursor};
|
||||
|
||||
use bytes::{Buf, BufMut};
|
||||
use itertools::Itertools;
|
||||
use rand::distributions::{Distribution, Standard};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||
use zerocopy::{FromBytes, Immutable, IntoBytes, big_endian};
|
||||
|
||||
pub type ErrorCode = [u8; 5];
|
||||
|
||||
pub const FE_PASSWORD_MESSAGE: u8 = b'p';
|
||||
|
||||
pub const SQLSTATE_INTERNAL_ERROR: [u8; 5] = *b"XX000";
|
||||
|
||||
/// The protocol version number.
|
||||
///
|
||||
/// The most significant 16 bits are the major version number (3 for the protocol described here).
|
||||
/// The least significant 16 bits are the minor version number (0 for the protocol described here).
|
||||
/// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-STARTUPMESSAGE>
|
||||
#[derive(Clone, Copy, PartialEq, PartialOrd, FromBytes, IntoBytes, Immutable)]
|
||||
#[repr(C)]
|
||||
pub struct ProtocolVersion {
|
||||
major: big_endian::U16,
|
||||
minor: big_endian::U16,
|
||||
}
|
||||
|
||||
impl ProtocolVersion {
|
||||
pub const fn new(major: u16, minor: u16) -> Self {
|
||||
Self {
|
||||
major: big_endian::U16::new(major),
|
||||
minor: big_endian::U16::new(minor),
|
||||
}
|
||||
}
|
||||
pub const fn minor(self) -> u16 {
|
||||
self.minor.get()
|
||||
}
|
||||
pub const fn major(self) -> u16 {
|
||||
self.major.get()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ProtocolVersion {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_list()
|
||||
.entry(&self.major())
|
||||
.entry(&self.minor())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// read the type from the stream using zerocopy.
|
||||
///
|
||||
/// not cancel safe.
|
||||
macro_rules! read {
|
||||
($s:expr => $t:ty) => {{
|
||||
// cannot be implemented as a function due to lack of const-generic-expr
|
||||
let mut buf = [0; size_of::<$t>()];
|
||||
$s.read_exact(&mut buf).await?;
|
||||
let res: $t = zerocopy::transmute!(buf);
|
||||
res
|
||||
}};
|
||||
}
|
||||
|
||||
pub async fn read_startup<S>(stream: &mut S) -> io::Result<FeStartupPacket>
|
||||
where
|
||||
S: AsyncRead + Unpin,
|
||||
{
|
||||
/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L118>
|
||||
const MAX_STARTUP_PACKET_LENGTH: usize = 10000;
|
||||
const RESERVED_INVALID_MAJOR_VERSION: u16 = 1234;
|
||||
/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L132>
|
||||
const CANCEL_REQUEST_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5678);
|
||||
/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L166>
|
||||
const NEGOTIATE_SSL_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5679);
|
||||
/// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L167>
|
||||
const NEGOTIATE_GSS_CODE: ProtocolVersion = ProtocolVersion::new(1234, 5680);
|
||||
|
||||
/// This first reads the startup message header, is 8 bytes.
|
||||
/// The first 4 bytes is a big-endian message length, and the next 4 bytes is a version number.
|
||||
///
|
||||
/// The length value is inclusive of the header. For example,
|
||||
/// an empty message will always have length 8.
|
||||
#[derive(Clone, Copy, FromBytes, IntoBytes, Immutable)]
|
||||
#[repr(C)]
|
||||
struct StartupHeader {
|
||||
len: big_endian::U32,
|
||||
version: ProtocolVersion,
|
||||
}
|
||||
|
||||
let header = read!(stream => StartupHeader);
|
||||
|
||||
// <https://github.com/postgres/postgres/blob/04bcf9e19a4261fe9c7df37c777592c2e10c32a7/src/backend/tcop/backend_startup.c#L378-L382>
|
||||
// First byte indicates standard SSL handshake message
|
||||
// (It can't be a Postgres startup length because in network byte order
|
||||
// that would be a startup packet hundreds of megabytes long)
|
||||
if header.as_bytes()[0] == 0x16 {
|
||||
return Ok(FeStartupPacket::SslRequest {
|
||||
// The bytes we read for the header are actually part of a TLS ClientHello.
|
||||
// In theory, if the ClientHello was < 8 bytes we would fail with EOF before we get here.
|
||||
// In practice though, I see no world where a ClientHello is less than 8 bytes
|
||||
// since it includes ephemeral keys etc.
|
||||
direct: Some(zerocopy::transmute!(header)),
|
||||
});
|
||||
}
|
||||
|
||||
let Some(len) = (header.len.get() as usize).checked_sub(8) else {
|
||||
return Err(io::Error::other(format!(
|
||||
"invalid startup message length {}, must be at least 8.",
|
||||
header.len,
|
||||
)));
|
||||
};
|
||||
|
||||
// TODO: add a histogram for startup packet lengths
|
||||
if len > MAX_STARTUP_PACKET_LENGTH {
|
||||
tracing::warn!("large startup message detected: {len} bytes");
|
||||
return Err(io::Error::other(format!(
|
||||
"invalid startup message length {len}"
|
||||
)));
|
||||
}
|
||||
|
||||
match header.version {
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-CANCELREQUEST>
|
||||
CANCEL_REQUEST_CODE => {
|
||||
if len != 8 {
|
||||
return Err(io::Error::other(
|
||||
"CancelRequest message is malformed, backend PID / secret key missing",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(FeStartupPacket::CancelRequest(
|
||||
read!(stream => CancelKeyData),
|
||||
))
|
||||
}
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-SSLREQUEST>
|
||||
NEGOTIATE_SSL_CODE => {
|
||||
// Requested upgrade to SSL (aka TLS)
|
||||
Ok(FeStartupPacket::SslRequest { direct: None })
|
||||
}
|
||||
NEGOTIATE_GSS_CODE => {
|
||||
// Requested upgrade to GSSAPI
|
||||
Ok(FeStartupPacket::GssEncRequest)
|
||||
}
|
||||
version if version.major() == RESERVED_INVALID_MAJOR_VERSION => Err(io::Error::other(
|
||||
format!("Unrecognized request code {version:?}"),
|
||||
)),
|
||||
// StartupMessage
|
||||
version => {
|
||||
// The protocol version number is followed by one or more pairs of parameter name and value strings.
|
||||
// A zero byte is required as a terminator after the last name/value pair.
|
||||
// Parameters can appear in any order. user is required, others are optional.
|
||||
|
||||
let mut buf = vec![0; len];
|
||||
stream.read_exact(&mut buf).await?;
|
||||
|
||||
if buf.pop() != Some(b'\0') {
|
||||
return Err(io::Error::other(
|
||||
"StartupMessage params: missing null terminator",
|
||||
));
|
||||
}
|
||||
|
||||
// TODO: Don't do this.
|
||||
// There's no guarantee that these messages are utf8,
|
||||
// but they usually happen to be simple ascii.
|
||||
let params = String::from_utf8(buf)
|
||||
.map_err(|_| io::Error::other("StartupMessage params: invalid utf-8"))?;
|
||||
|
||||
Ok(FeStartupPacket::StartupMessage {
|
||||
version,
|
||||
params: StartupMessageParams { params },
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a raw postgres packet, which will respect the max length requested.
|
||||
///
|
||||
/// This returns the message tag, as well as the message body. The message
|
||||
/// body is written into `buf`, and it is otherwise completely overwritten.
|
||||
///
|
||||
/// This is not cancel safe.
|
||||
pub async fn read_message<'a, S>(
|
||||
stream: &mut S,
|
||||
buf: &'a mut Vec<u8>,
|
||||
max: u32,
|
||||
) -> io::Result<(u8, &'a mut [u8])>
|
||||
where
|
||||
S: AsyncRead + Unpin,
|
||||
{
|
||||
/// This first reads the header, which for regular messages in the 3.0 protocol is 5 bytes.
|
||||
/// The first byte is a message tag, and the next 4 bytes is a big-endian length.
|
||||
///
|
||||
/// Awkwardly, the length value is inclusive of itself, but not of the tag. For example,
|
||||
/// an empty message will always have length 4.
|
||||
#[derive(Clone, Copy, FromBytes)]
|
||||
#[repr(C)]
|
||||
struct Header {
|
||||
tag: u8,
|
||||
len: big_endian::U32,
|
||||
}
|
||||
|
||||
let header = read!(stream => Header);
|
||||
|
||||
// as described above, the length must be at least 4.
|
||||
let Some(len) = header.len.get().checked_sub(4) else {
|
||||
return Err(io::Error::other(format!(
|
||||
"invalid startup message length {}, must be at least 4.",
|
||||
header.len,
|
||||
)));
|
||||
};
|
||||
|
||||
// TODO: add a histogram for message lengths
|
||||
|
||||
// check if the message exceeds our desired max.
|
||||
if len > max {
|
||||
tracing::warn!("large postgres message detected: {len} bytes");
|
||||
return Err(io::Error::other(format!("invalid message length {len}")));
|
||||
}
|
||||
|
||||
// read in our entire message.
|
||||
buf.resize(len as usize, 0);
|
||||
stream.read_exact(buf).await?;
|
||||
|
||||
Ok((header.tag, buf))
|
||||
}
|
||||
|
||||
pub struct WriteBuf(Cursor<Vec<u8>>);
|
||||
|
||||
impl Buf for WriteBuf {
|
||||
#[inline]
|
||||
fn remaining(&self) -> usize {
|
||||
self.0.remaining()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn chunk(&self) -> &[u8] {
|
||||
self.0.chunk()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn advance(&mut self, cnt: usize) {
|
||||
self.0.advance(cnt);
|
||||
}
|
||||
}
|
||||
|
||||
impl WriteBuf {
|
||||
pub const fn new() -> Self {
|
||||
Self(Cursor::new(Vec::new()))
|
||||
}
|
||||
|
||||
/// Use a heuristic to determine if we should shrink the write buffer.
|
||||
#[inline]
|
||||
fn should_shrink(&self) -> bool {
|
||||
let n = self.0.position() as usize;
|
||||
let len = self.0.get_ref().len();
|
||||
|
||||
// the unused space at the front of our buffer is 2x the size of our filled portion.
|
||||
n + n > len
|
||||
}
|
||||
|
||||
/// Shrink the write buffer so that subsequent writes have more spare capacity.
|
||||
#[cold]
|
||||
fn shrink(&mut self) {
|
||||
let n = self.0.position() as usize;
|
||||
let buf = self.0.get_mut();
|
||||
|
||||
// buf repr:
|
||||
// [----unused------|-----filled-----|-----uninit-----]
|
||||
// ^ n ^ buf.len() ^ buf.capacity()
|
||||
let filled = n..buf.len();
|
||||
let filled_len = filled.len();
|
||||
buf.copy_within(filled, 0);
|
||||
buf.truncate(filled_len);
|
||||
self.0.set_position(0);
|
||||
}
|
||||
|
||||
/// clear the write buffer.
|
||||
pub fn reset(&mut self) {
|
||||
let buf = self.0.get_mut();
|
||||
buf.clear();
|
||||
self.0.set_position(0);
|
||||
}
|
||||
|
||||
/// Write a raw message to the internal buffer.
|
||||
///
|
||||
/// The size_hint value is only a hint for reserving space. It's ok if it's incorrect, since
|
||||
/// we calculate the length after the fact.
|
||||
pub fn write_raw(&mut self, size_hint: usize, tag: u8, f: impl FnOnce(&mut Vec<u8>)) {
|
||||
if self.should_shrink() {
|
||||
self.shrink();
|
||||
}
|
||||
|
||||
let buf = self.0.get_mut();
|
||||
buf.reserve(5 + size_hint);
|
||||
|
||||
buf.push(tag);
|
||||
let start = buf.len();
|
||||
buf.extend_from_slice(&[0, 0, 0, 0]);
|
||||
|
||||
f(buf);
|
||||
|
||||
let end = buf.len();
|
||||
let len = (end - start) as u32;
|
||||
buf[start..start + 4].copy_from_slice(&len.to_be_bytes());
|
||||
}
|
||||
|
||||
/// Write an encryption response message.
|
||||
pub fn encryption(&mut self, m: u8) {
|
||||
self.0.get_mut().push(m);
|
||||
}
|
||||
|
||||
pub fn write_error(&mut self, msg: &str, error_code: ErrorCode) {
|
||||
self.shrink();
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-ERRORRESPONSE>
|
||||
// <https://www.postgresql.org/docs/current/protocol-error-fields.html>
|
||||
// "SERROR\0CXXXXX\0M\0\0".len() == 17
|
||||
self.write_raw(17 + msg.len(), b'E', |buf| {
|
||||
// Severity: ERROR
|
||||
buf.put_slice(b"SERROR\0");
|
||||
|
||||
// Code: error_code
|
||||
buf.put_u8(b'C');
|
||||
buf.put_slice(&error_code);
|
||||
buf.put_u8(0);
|
||||
|
||||
// Message: msg
|
||||
buf.put_u8(b'M');
|
||||
buf.put_slice(msg.as_bytes());
|
||||
buf.put_u8(0);
|
||||
|
||||
// End.
|
||||
buf.put_u8(0);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FeStartupPacket {
|
||||
CancelRequest(CancelKeyData),
|
||||
SslRequest {
|
||||
direct: Option<[u8; 8]>,
|
||||
},
|
||||
GssEncRequest,
|
||||
StartupMessage {
|
||||
version: ProtocolVersion,
|
||||
params: StartupMessageParams,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct StartupMessageParams {
|
||||
pub params: String,
|
||||
}
|
||||
|
||||
impl StartupMessageParams {
|
||||
/// Get parameter's value by its name.
|
||||
pub fn get(&self, name: &str) -> Option<&str> {
|
||||
self.iter().find_map(|(k, v)| (k == name).then_some(v))
|
||||
}
|
||||
|
||||
/// Split command-line options according to PostgreSQL's logic,
|
||||
/// taking into account all escape sequences but leaving them as-is.
|
||||
/// [`None`] means that there's no `options` in [`Self`].
|
||||
pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {
|
||||
self.get("options").map(Self::parse_options_raw)
|
||||
}
|
||||
|
||||
/// Split command-line options according to PostgreSQL's logic,
|
||||
/// taking into account all escape sequences but leaving them as-is.
|
||||
pub fn parse_options_raw(input: &str) -> impl Iterator<Item = &str> {
|
||||
// See `postgres: pg_split_opts`.
|
||||
let mut last_was_escape = false;
|
||||
input
|
||||
.split(move |c: char| {
|
||||
// We split by non-escaped whitespace symbols.
|
||||
let should_split = c.is_ascii_whitespace() && !last_was_escape;
|
||||
last_was_escape = c == '\\' && !last_was_escape;
|
||||
should_split
|
||||
})
|
||||
.filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
/// Iterate through key-value pairs in an arbitrary order.
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
|
||||
self.params.split_terminator('\0').tuples()
|
||||
}
|
||||
|
||||
// This function is mostly useful in tests.
|
||||
#[cfg(test)]
|
||||
pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self {
|
||||
let mut b = Self {
|
||||
params: String::new(),
|
||||
};
|
||||
for (k, v) in pairs {
|
||||
b.insert(k, v);
|
||||
}
|
||||
b
|
||||
}
|
||||
|
||||
/// Set parameter's value by its name.
|
||||
/// name and value must not contain a \0 byte
|
||||
pub fn insert(&mut self, name: &str, value: &str) {
|
||||
self.params.reserve(name.len() + value.len() + 2);
|
||||
self.params.push_str(name);
|
||||
self.params.push('\0');
|
||||
self.params.push_str(value);
|
||||
self.params.push('\0');
|
||||
}
|
||||
}
|
||||
|
||||
/// Cancel keys usually are represented as PID+SecretKey, but to proxy they're just
|
||||
/// opaque bytes.
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, FromBytes, IntoBytes, Immutable)]
|
||||
pub struct CancelKeyData(pub big_endian::U64);
|
||||
|
||||
pub fn id_to_cancel_key(id: u64) -> CancelKeyData {
|
||||
CancelKeyData(big_endian::U64::new(id))
|
||||
}
|
||||
|
||||
impl fmt::Display for CancelKeyData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let id = self.0;
|
||||
f.debug_tuple("CancelKeyData")
|
||||
.field(&format_args!("{id:x}"))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
impl Distribution<CancelKeyData> for Standard {
|
||||
fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> CancelKeyData {
|
||||
id_to_cancel_key(rng.r#gen())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum BeMessage<'a> {
|
||||
AuthenticationOk,
|
||||
AuthenticationSasl(BeAuthenticationSaslMessage<'a>),
|
||||
AuthenticationCleartextPassword,
|
||||
BackendKeyData(CancelKeyData),
|
||||
ParameterStatus {
|
||||
name: &'a [u8],
|
||||
value: &'a [u8],
|
||||
},
|
||||
ReadyForQuery,
|
||||
NoticeResponse(&'a str),
|
||||
NegotiateProtocolVersion {
|
||||
version: ProtocolVersion,
|
||||
options: &'a [&'a str],
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BeAuthenticationSaslMessage<'a> {
|
||||
Methods(&'a [&'a str]),
|
||||
Continue(&'a [u8]),
|
||||
Final(&'a [u8]),
|
||||
}
|
||||
|
||||
impl BeMessage<'_> {
|
||||
/// Write the message into an internal buffer
|
||||
pub fn write_message(self, buf: &mut WriteBuf) {
|
||||
match self {
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONCLEARTEXTPASSWORD>
|
||||
BeMessage::AuthenticationOk => {
|
||||
buf.write_raw(1, b'R', |buf| buf.put_i32(0));
|
||||
}
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONCLEARTEXTPASSWORD>
|
||||
BeMessage::AuthenticationCleartextPassword => {
|
||||
buf.write_raw(1, b'R', |buf| buf.put_i32(3));
|
||||
}
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>
|
||||
BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Methods(methods)) => {
|
||||
let len: usize = methods.iter().map(|m| m.len() + 1).sum();
|
||||
buf.write_raw(len + 2, b'R', |buf| {
|
||||
buf.put_i32(10); // Specifies that SASL auth method is used.
|
||||
for method in methods {
|
||||
buf.put_slice(method.as_bytes());
|
||||
buf.put_u8(0);
|
||||
}
|
||||
buf.put_u8(0); // zero terminator for the list
|
||||
});
|
||||
}
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>
|
||||
BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Continue(extra)) => {
|
||||
buf.write_raw(extra.len() + 1, b'R', |buf| {
|
||||
buf.put_i32(11); // Continue SASL auth.
|
||||
buf.put_slice(extra);
|
||||
});
|
||||
}
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-AUTHENTICATIONSASL>
|
||||
BeMessage::AuthenticationSasl(BeAuthenticationSaslMessage::Final(extra)) => {
|
||||
buf.write_raw(extra.len() + 1, b'R', |buf| {
|
||||
buf.put_i32(12); // Send final SASL message.
|
||||
buf.put_slice(extra);
|
||||
});
|
||||
}
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-BACKENDKEYDATA>
|
||||
BeMessage::BackendKeyData(key_data) => {
|
||||
buf.write_raw(8, b'K', |buf| buf.put_slice(key_data.as_bytes()));
|
||||
}
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NOTICERESPONSE>
|
||||
// <https://www.postgresql.org/docs/current/protocol-error-fields.html>
|
||||
BeMessage::NoticeResponse(msg) => {
|
||||
// 'N' signalizes NoticeResponse messages
|
||||
buf.write_raw(18 + msg.len(), b'N', |buf| {
|
||||
// Severity: NOTICE
|
||||
buf.put_slice(b"SNOTICE\0");
|
||||
|
||||
// Code: XX000 (ignored for notice, but still required)
|
||||
buf.put_slice(b"CXX000\0");
|
||||
|
||||
// Message: msg
|
||||
buf.put_u8(b'M');
|
||||
buf.put_slice(msg.as_bytes());
|
||||
buf.put_u8(0);
|
||||
|
||||
// End notice.
|
||||
buf.put_u8(0);
|
||||
});
|
||||
}
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-PARAMETERSTATUS>
|
||||
BeMessage::ParameterStatus { name, value } => {
|
||||
buf.write_raw(name.len() + value.len() + 2, b'S', |buf| {
|
||||
buf.put_slice(name.as_bytes());
|
||||
buf.put_u8(0);
|
||||
buf.put_slice(value.as_bytes());
|
||||
buf.put_u8(0);
|
||||
});
|
||||
}
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NEGOTIATEPROTOCOLVERSION>
|
||||
BeMessage::ReadyForQuery => {
|
||||
buf.write_raw(1, b'Z', |buf| buf.put_u8(b'I'));
|
||||
}
|
||||
|
||||
// <https://www.postgresql.org/docs/current/protocol-message-formats.html#PROTOCOL-MESSAGE-FORMATS-NEGOTIATEPROTOCOLVERSION>
|
||||
BeMessage::NegotiateProtocolVersion { version, options } => {
|
||||
let len: usize = options.iter().map(|o| o.len() + 1).sum();
|
||||
buf.write_raw(8 + len, b'v', |buf| {
|
||||
buf.put_slice(version.as_bytes());
|
||||
buf.put_u32(options.len() as u32);
|
||||
for option in options {
|
||||
buf.put_slice(option.as_bytes());
|
||||
buf.put_u8(0);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use tokio::io::{AsyncWriteExt, duplex};
|
||||
use zerocopy::IntoBytes;
|
||||
|
||||
use crate::pqproto::{FeStartupPacket, read_message, read_startup};
|
||||
|
||||
use super::ProtocolVersion;
|
||||
|
||||
#[tokio::test]
|
||||
async fn reject_large_startup() {
|
||||
// we're going to define a v3.0 startup message with far too many parameters.
|
||||
let mut payload = vec![];
|
||||
// 10001 + 8 bytes.
|
||||
payload.extend_from_slice(&10009_u32.to_be_bytes());
|
||||
payload.extend_from_slice(ProtocolVersion::new(3, 0).as_bytes());
|
||||
payload.resize(10009, b'a');
|
||||
|
||||
let (mut server, mut client) = duplex(128);
|
||||
#[rustfmt::skip]
|
||||
let (server, client) = tokio::join!(
|
||||
async move { read_startup(&mut server).await.unwrap_err() },
|
||||
async move { client.write_all(&payload).await.unwrap_err() },
|
||||
);
|
||||
|
||||
assert_eq!(server.to_string(), "invalid startup message length 10001");
|
||||
assert_eq!(client.to_string(), "broken pipe");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reject_large_password() {
|
||||
// we're going to define a password message that is far too long.
|
||||
let mut payload = vec![];
|
||||
payload.push(b'p');
|
||||
payload.extend_from_slice(&517_u32.to_be_bytes());
|
||||
payload.resize(518, b'a');
|
||||
|
||||
let (mut server, mut client) = duplex(128);
|
||||
#[rustfmt::skip]
|
||||
let (server, client) = tokio::join!(
|
||||
async move { read_message(&mut server, &mut vec![], 512).await.unwrap_err() },
|
||||
async move { client.write_all(&payload).await.unwrap_err() },
|
||||
);
|
||||
|
||||
assert_eq!(server.to_string(), "invalid message length 513");
|
||||
assert_eq!(client.to_string(), "broken pipe");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_startup_message() {
|
||||
let mut payload = vec![];
|
||||
payload.extend_from_slice(&17_u32.to_be_bytes());
|
||||
payload.extend_from_slice(ProtocolVersion::new(3, 0).as_bytes());
|
||||
payload.extend_from_slice(b"abc\0def\0\0");
|
||||
|
||||
let startup = read_startup(&mut Cursor::new(&payload)).await.unwrap();
|
||||
let FeStartupPacket::StartupMessage { version, params } = startup else {
|
||||
panic!("unexpected startup message: {startup:?}");
|
||||
};
|
||||
|
||||
assert_eq!(version.major(), 3);
|
||||
assert_eq!(version.minor(), 0);
|
||||
assert_eq!(params.params, "abc\0def\0");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_ssl_message() {
|
||||
let mut payload = vec![];
|
||||
payload.extend_from_slice(&8_u32.to_be_bytes());
|
||||
payload.extend_from_slice(ProtocolVersion::new(1234, 5679).as_bytes());
|
||||
|
||||
let startup = read_startup(&mut Cursor::new(&payload)).await.unwrap();
|
||||
let FeStartupPacket::SslRequest { direct: None } = startup else {
|
||||
panic!("unexpected startup message: {startup:?}");
|
||||
};
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_tls_message() {
|
||||
// sample client hello taken from <https://tls13.xargs.org/#client-hello>
|
||||
let client_hello = [
|
||||
0x16, 0x03, 0x01, 0x00, 0xf8, 0x01, 0x00, 0x00, 0xf4, 0x03, 0x03, 0x00, 0x01, 0x02,
|
||||
0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e,
|
||||
0x1f, 0x20, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb,
|
||||
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9,
|
||||
0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x08, 0x13, 0x02, 0x13, 0x03, 0x13, 0x01,
|
||||
0x00, 0xff, 0x01, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0x18, 0x00, 0x16, 0x00, 0x00,
|
||||
0x13, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x2e, 0x75, 0x6c, 0x66, 0x68, 0x65,
|
||||
0x69, 0x6d, 0x2e, 0x6e, 0x65, 0x74, 0x00, 0x0b, 0x00, 0x04, 0x03, 0x00, 0x01, 0x02,
|
||||
0x00, 0x0a, 0x00, 0x16, 0x00, 0x14, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x1e, 0x00, 0x19,
|
||||
0x00, 0x18, 0x01, 0x00, 0x01, 0x01, 0x01, 0x02, 0x01, 0x03, 0x01, 0x04, 0x00, 0x23,
|
||||
0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x1e,
|
||||
0x00, 0x1c, 0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x08, 0x07, 0x08, 0x08, 0x08, 0x09,
|
||||
0x08, 0x0a, 0x08, 0x0b, 0x08, 0x04, 0x08, 0x05, 0x08, 0x06, 0x04, 0x01, 0x05, 0x01,
|
||||
0x06, 0x01, 0x00, 0x2b, 0x00, 0x03, 0x02, 0x03, 0x04, 0x00, 0x2d, 0x00, 0x02, 0x01,
|
||||
0x01, 0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x35, 0x80, 0x72,
|
||||
0xd6, 0x36, 0x58, 0x80, 0xd1, 0xae, 0xea, 0x32, 0x9a, 0xdf, 0x91, 0x21, 0x38, 0x38,
|
||||
0x51, 0xed, 0x21, 0xa2, 0x8e, 0x3b, 0x75, 0xe9, 0x65, 0xd0, 0xd2, 0xcd, 0x16, 0x62,
|
||||
0x54,
|
||||
];
|
||||
|
||||
let mut cursor = Cursor::new(&client_hello);
|
||||
|
||||
let startup = read_startup(&mut cursor).await.unwrap();
|
||||
let FeStartupPacket::SslRequest {
|
||||
direct: Some(prefix),
|
||||
} = startup
|
||||
else {
|
||||
panic!("unexpected startup message: {startup:?}");
|
||||
};
|
||||
|
||||
// check that no data is lost.
|
||||
assert_eq!(prefix, [0x16, 0x03, 0x01, 0x00, 0xf8, 0x01, 0x00, 0x00]);
|
||||
assert_eq!(cursor.position(), 8);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_message_success() {
|
||||
let query = b"Q\0\0\0\x0cSELECT 1Q\0\0\0\x0cSELECT 2";
|
||||
let mut cursor = Cursor::new(&query);
|
||||
|
||||
let mut buf = vec![];
|
||||
let (tag, message) = read_message(&mut cursor, &mut buf, 100).await.unwrap();
|
||||
assert_eq!(tag, b'Q');
|
||||
assert_eq!(message, b"SELECT 1");
|
||||
|
||||
let (tag, message) = read_message(&mut cursor, &mut buf, 100).await.unwrap();
|
||||
assert_eq!(tag, b'Q');
|
||||
assert_eq!(message, b"SELECT 2");
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use pq_proto::StartupMessageParams;
|
||||
use tokio::time;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
@@ -14,7 +15,6 @@ use crate::error::ReportableError;
|
||||
use crate::metrics::{
|
||||
ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType,
|
||||
};
|
||||
use crate::pqproto::StartupMessageParams;
|
||||
use crate::proxy::retry::{CouldRetry, retry_after, should_retry};
|
||||
use crate::proxy::wake_compute::wake_compute;
|
||||
use crate::types::Host;
|
||||
@@ -53,6 +53,25 @@ pub(crate) trait ConnectMechanism {
|
||||
fn update_connect_config(&self, conf: &mut compute::ConnCfg);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: ConnectMechanism + Sync> ConnectMechanism for &T {
|
||||
type Connection = T::Connection;
|
||||
type ConnectError = T::ConnectError;
|
||||
type Error = T::Error;
|
||||
async fn connect_once(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
node_info: &control_plane::CachedNodeInfo,
|
||||
config: &ComputeConfig,
|
||||
) -> Result<Self::Connection, Self::ConnectError> {
|
||||
T::connect_once(self, ctx, node_info, config).await
|
||||
}
|
||||
|
||||
fn update_connect_config(&self, conf: &mut compute::ConnCfg) {
|
||||
T::update_connect_config(self, conf);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub(crate) trait ComputeConnectBackend {
|
||||
async fn wake_compute(
|
||||
@@ -105,8 +124,8 @@ impl ConnectMechanism for TcpMechanism<'_> {
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBackend>(
|
||||
ctx: &RequestContext,
|
||||
mechanism: &M,
|
||||
user_info: &B,
|
||||
mechanism: M,
|
||||
backend: B,
|
||||
wake_compute_retry_config: RetryConfig,
|
||||
compute: &ComputeConfig,
|
||||
) -> Result<M::Connection, M::Error>
|
||||
@@ -116,9 +135,9 @@ where
|
||||
{
|
||||
let mut num_retries = 0;
|
||||
let mut node_info =
|
||||
wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?;
|
||||
wake_compute(&mut num_retries, ctx, &backend, wake_compute_retry_config).await?;
|
||||
|
||||
node_info.set_keys(user_info.get_keys());
|
||||
node_info.set_keys(backend.get_keys());
|
||||
mechanism.update_connect_config(&mut node_info.config);
|
||||
|
||||
// try once
|
||||
@@ -159,7 +178,7 @@ where
|
||||
let old_node_info = invalidate_cache(node_info);
|
||||
// TODO: increment num_retries?
|
||||
let mut node_info =
|
||||
wake_compute(&mut num_retries, ctx, user_info, wake_compute_retry_config).await?;
|
||||
wake_compute(&mut num_retries, ctx, &backend, wake_compute_retry_config).await?;
|
||||
node_info.reuse_settings(old_node_info);
|
||||
|
||||
mechanism.update_connect_config(&mut node_info.config);
|
||||
|
||||
@@ -67,7 +67,6 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn copy_bidirectional_client_compute<Client, Compute>(
|
||||
client: &mut Client,
|
||||
compute: &mut Compute,
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use bytes::Buf;
|
||||
use pq_proto::framed::Framed;
|
||||
use pq_proto::{
|
||||
BeMessage as Be, CancelKeyData, FeStartupPacket, ProtocolVersion, StartupMessageParams,
|
||||
};
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::auth::endpoint_sni;
|
||||
@@ -8,10 +13,7 @@ use crate::config::TlsConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::ReportableError;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::pqproto::{
|
||||
BeMessage, CancelKeyData, FeStartupPacket, ProtocolVersion, StartupMessageParams,
|
||||
};
|
||||
use crate::proxy::TlsRequired;
|
||||
use crate::proxy::ERR_INSECURE_CONNECTION;
|
||||
use crate::stream::{PqStream, Stream, StreamUpgradeError};
|
||||
use crate::tls::PG_ALPN_PROTOCOL;
|
||||
|
||||
@@ -50,7 +52,7 @@ impl ReportableError for HandshakeError {
|
||||
|
||||
pub(crate) enum HandshakeData<S> {
|
||||
Startup(PqStream<Stream<S>>, StartupMessageParams),
|
||||
Cancel(CancelKeyData),
|
||||
Cancel(CancelKeyData, TaskTrackerToken),
|
||||
}
|
||||
|
||||
/// Establish a (most probably, secure) connection with the client.
|
||||
@@ -58,9 +60,10 @@ pub(crate) enum HandshakeData<S> {
|
||||
/// It's easier to work with owned `stream` here as we need to upgrade it to TLS;
|
||||
/// we also take an extra care of propagating only the select handshake errors to client.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
||||
ctx: &RequestContext,
|
||||
stream: S,
|
||||
tracker: TaskTrackerToken,
|
||||
mut tls: Option<&TlsConfig>,
|
||||
record_handshake_error: bool,
|
||||
) -> Result<HandshakeData<S>, HandshakeError> {
|
||||
@@ -70,25 +73,33 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
const PG_PROTOCOL_EARLIEST: ProtocolVersion = ProtocolVersion::new(3, 0);
|
||||
const PG_PROTOCOL_LATEST: ProtocolVersion = ProtocolVersion::new(3, 0);
|
||||
|
||||
let (mut stream, mut msg) = PqStream::parse_startup(Stream::from_raw(stream)).await?;
|
||||
let mut stream = PqStream::new(Stream::from_raw(stream), tracker);
|
||||
loop {
|
||||
let msg = stream.read_startup_packet().await?;
|
||||
match msg {
|
||||
FeStartupPacket::SslRequest { direct } => match stream.get_ref() {
|
||||
Stream::Raw { .. } if !tried_ssl => {
|
||||
tried_ssl = true;
|
||||
|
||||
// We can't perform TLS handshake without a config
|
||||
let have_tls = tls.is_some();
|
||||
if !direct {
|
||||
stream
|
||||
.write_message(&Be::EncryptionResponse(have_tls))
|
||||
.await?;
|
||||
} else if !have_tls {
|
||||
return Err(HandshakeError::ProtocolViolation);
|
||||
}
|
||||
|
||||
if let Some(tls) = tls.take() {
|
||||
// Upgrade raw stream into a secure TLS-backed stream.
|
||||
// NOTE: We've consumed `tls`; this fact will be used later.
|
||||
|
||||
let mut read_buf;
|
||||
let raw = if let Some(direct) = &direct {
|
||||
read_buf = &direct[..];
|
||||
stream.accept_direct_tls()
|
||||
} else {
|
||||
read_buf = &[];
|
||||
stream.accept_tls().await?
|
||||
};
|
||||
let Framed {
|
||||
stream: raw,
|
||||
read_buf,
|
||||
write_buf,
|
||||
} = stream.framed;
|
||||
|
||||
let Stream::Raw { raw } = raw else {
|
||||
return Err(HandshakeError::StreamUpgradeError(
|
||||
@@ -96,11 +107,12 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
));
|
||||
};
|
||||
|
||||
let mut read_buf = read_buf.reader();
|
||||
let mut res = Ok(());
|
||||
let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone())
|
||||
.accept_with(raw, |session| {
|
||||
// push the early data to the tls session
|
||||
while !read_buf.is_empty() {
|
||||
while !read_buf.get_ref().is_empty() {
|
||||
match session.read_tls(&mut read_buf) {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
@@ -109,12 +121,11 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.map_ok(Box::new)
|
||||
.boxed();
|
||||
});
|
||||
|
||||
res?;
|
||||
|
||||
let read_buf = read_buf.into_inner();
|
||||
if !read_buf.is_empty() {
|
||||
return Err(HandshakeError::EarlyData);
|
||||
}
|
||||
@@ -148,17 +159,14 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
let (_, tls_server_end_point) =
|
||||
tls.cert_resolver.resolve(conn_info.server_name());
|
||||
|
||||
let tls = Stream::Tls {
|
||||
tls: tls_stream,
|
||||
tls_server_end_point,
|
||||
stream.framed = Framed {
|
||||
stream: Stream::Tls {
|
||||
tls: Box::new(tls_stream),
|
||||
tls_server_end_point,
|
||||
},
|
||||
read_buf,
|
||||
write_buf,
|
||||
};
|
||||
(stream, msg) = PqStream::parse_startup(tls).await?;
|
||||
} else {
|
||||
if direct.is_some() {
|
||||
// client sent us a ClientHello already, we can't do anything with it.
|
||||
return Err(HandshakeError::ProtocolViolation);
|
||||
}
|
||||
msg = stream.reject_encryption().await?;
|
||||
}
|
||||
}
|
||||
_ => return Err(HandshakeError::ProtocolViolation),
|
||||
@@ -168,7 +176,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
tried_gss = true;
|
||||
|
||||
// Currently, we don't support GSSAPI
|
||||
msg = stream.reject_encryption().await?;
|
||||
stream.write_message(&Be::EncryptionResponse(false)).await?;
|
||||
}
|
||||
_ => return Err(HandshakeError::ProtocolViolation),
|
||||
},
|
||||
@@ -178,7 +186,13 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
// Check that the config has been consumed during upgrade
|
||||
// OR we didn't provide it at all (for dev purposes).
|
||||
if tls.is_some() {
|
||||
Err(stream.throw_error(TlsRequired, None).await)?;
|
||||
return stream
|
||||
.throw_error_str(
|
||||
ERR_INSECURE_CONNECTION,
|
||||
crate::error::ErrorKind::User,
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// This log highlights the start of the connection.
|
||||
@@ -200,21 +214,20 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
// no protocol extensions are supported.
|
||||
// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/backend/tcop/backend_startup.c#L744-L753>
|
||||
let mut unsupported = vec![];
|
||||
let mut supported = StartupMessageParams::default();
|
||||
|
||||
for (k, v) in params.iter() {
|
||||
for (k, _) in params.iter() {
|
||||
if k.starts_with("_pq_.") {
|
||||
unsupported.push(k);
|
||||
} else {
|
||||
supported.insert(k, v);
|
||||
}
|
||||
}
|
||||
|
||||
stream.write_message(BeMessage::NegotiateProtocolVersion {
|
||||
version: PG_PROTOCOL_LATEST,
|
||||
options: &unsupported,
|
||||
});
|
||||
stream.flush().await?;
|
||||
// TODO: remove unsupported options so we don't send them to compute.
|
||||
|
||||
stream
|
||||
.write_message(&Be::NegotiateProtocolVersion {
|
||||
version: PG_PROTOCOL_LATEST,
|
||||
options: &unsupported,
|
||||
})
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
?version,
|
||||
@@ -222,7 +235,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
session_type = "normal",
|
||||
"successful handshake; unsupported minor version requested"
|
||||
);
|
||||
break Ok(HandshakeData::Startup(stream, supported));
|
||||
break Ok(HandshakeData::Startup(stream, params));
|
||||
}
|
||||
FeStartupPacket::StartupMessage { version, params } => {
|
||||
warn!(
|
||||
@@ -235,7 +248,7 @@ pub(crate) async fn handshake<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
}
|
||||
FeStartupPacket::CancelRequest(cancel_key_data) => {
|
||||
info!(session_type = "cancellation", "successful handshake");
|
||||
break Ok(HandshakeData::Cancel(cancel_key_data));
|
||||
break Ok(HandshakeData::Cancel(cancel_key_data, stream.tracker));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,26 +10,27 @@ pub(crate) mod wake_compute;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use copy_bidirectional::{ErrorSource, copy_bidirectional_client_compute};
|
||||
use futures::FutureExt;
|
||||
use futures::TryFutureExt;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use passthrough::passthrough;
|
||||
use pq_proto::{BeMessage as Be, CancelKeyData, StartupMessageParams};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smol_str::{SmolStr, ToSmolStr, format_smolstr};
|
||||
use smol_str::{SmolStr, format_smolstr};
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::{Instrument, debug, error, info, warn};
|
||||
|
||||
use self::connect_compute::{TcpMechanism, connect_to_compute};
|
||||
use self::passthrough::ProxyPassthrough;
|
||||
use crate::cancellation::{self, CancellationHandler};
|
||||
use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
use crate::error::ReportableError;
|
||||
use crate::metrics::{Metrics, NumClientConnectionsGuard};
|
||||
use crate::pqproto::{BeMessage, CancelKeyData, StartupMessageParams};
|
||||
use crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};
|
||||
use crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};
|
||||
use crate::proxy::handshake::{HandshakeData, handshake};
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
use crate::stream::{PqStream, Stream};
|
||||
@@ -38,18 +39,6 @@ use crate::{auth, compute};
|
||||
|
||||
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("{ERR_INSECURE_CONNECTION}")]
|
||||
pub struct TlsRequired;
|
||||
|
||||
impl ReportableError for TlsRequired {
|
||||
fn get_error_kind(&self) -> crate::error::ErrorKind {
|
||||
crate::error::ErrorKind::User
|
||||
}
|
||||
}
|
||||
|
||||
impl UserFacingError for TlsRequired {}
|
||||
|
||||
pub async fn run_until_cancelled<F: std::future::Future>(
|
||||
f: F,
|
||||
cancellation_token: &CancellationToken,
|
||||
@@ -82,7 +71,6 @@ pub async fn task_main(
|
||||
socket2::SockRef::from(&listener).set_keepalive(true)?;
|
||||
|
||||
let connections = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
let cancellations = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
|
||||
while let Some(accept_result) =
|
||||
run_until_cancelled(listener.accept(), &cancellation_token).await
|
||||
@@ -96,12 +84,12 @@ pub async fn task_main(
|
||||
|
||||
let session_id = uuid::Uuid::new_v4();
|
||||
let cancellation_handler = Arc::clone(&cancellation_handler);
|
||||
let cancellations = cancellations.clone();
|
||||
|
||||
debug!(protocol = "tcp", %session_id, "accepted new TCP connection");
|
||||
let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();
|
||||
|
||||
connections.spawn(async move {
|
||||
let tracker = connections.token();
|
||||
tokio::spawn(async move {
|
||||
let (socket, conn_info) = match read_proxy_protocol(socket).await {
|
||||
Err(e) => {
|
||||
warn!("per-client task finished with an error: {e:#}");
|
||||
@@ -150,60 +138,41 @@ pub async fn task_main(
|
||||
crate::metrics::Protocol::Tcp,
|
||||
&config.region,
|
||||
);
|
||||
let span = ctx.span();
|
||||
let mut ctx = Some(ctx);
|
||||
|
||||
let res = handle_client(
|
||||
config,
|
||||
auth_backend,
|
||||
&ctx,
|
||||
&mut ctx,
|
||||
cancellation_handler,
|
||||
socket,
|
||||
ClientMode::Tcp,
|
||||
endpoint_rate_limiter2,
|
||||
conn_gauge,
|
||||
cancellations,
|
||||
tracker,
|
||||
)
|
||||
.instrument(ctx.span())
|
||||
.boxed()
|
||||
.instrument(span)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Err(e) => {
|
||||
match (ctx, res) {
|
||||
(None, _) => {}
|
||||
(Some(ctx), Ok(())) => {
|
||||
ctx.success();
|
||||
}
|
||||
(Some(ctx), Err(e)) => {
|
||||
ctx.set_error_kind(e.get_error_kind());
|
||||
warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
|
||||
}
|
||||
Ok(None) => {
|
||||
ctx.set_success();
|
||||
}
|
||||
Ok(Some(p)) => {
|
||||
ctx.set_success();
|
||||
let _disconnect = ctx.log_connect();
|
||||
match p.proxy_pass(&config.connect_to_compute).await {
|
||||
Ok(()) => {}
|
||||
Err(ErrorSource::Client(e)) => {
|
||||
warn!(
|
||||
?session_id,
|
||||
"per-client task finished with an IO error from the client: {e:#}"
|
||||
);
|
||||
}
|
||||
Err(ErrorSource::Compute(e)) => {
|
||||
error!(
|
||||
?session_id,
|
||||
"per-client task finished with an IO error from the compute: {e:#}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
connections.close();
|
||||
cancellations.close();
|
||||
drop(listener);
|
||||
|
||||
// Drain connections
|
||||
connections.wait().await;
|
||||
cancellations.wait().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -270,46 +239,79 @@ impl ReportableError for ClientRequestError {
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
|
||||
config: &'static ProxyConfig,
|
||||
auth_backend: &'static auth::Backend<'static, ()>,
|
||||
ctx: &RequestContext,
|
||||
ctx_slot: &mut Option<RequestContext>,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
stream: S,
|
||||
mode: ClientMode,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
conn_gauge: NumClientConnectionsGuard<'static>,
|
||||
cancellations: tokio_util::task::task_tracker::TaskTracker,
|
||||
) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
|
||||
debug!(
|
||||
protocol = %ctx.protocol(),
|
||||
"handling interactive connection from client"
|
||||
);
|
||||
tracker: TaskTrackerToken,
|
||||
) -> Result<(), ClientRequestError> {
|
||||
let cplane = match auth_backend {
|
||||
auth::Backend::ControlPlane(cplane, ()) => &**cplane,
|
||||
auth::Backend::Local(_) => unreachable!("local proxy does not run tcp proxy service"),
|
||||
};
|
||||
|
||||
let protocol = ctx_slot.as_ref().expect("context must be set").protocol();
|
||||
debug!(%protocol, "handling interactive connection from client");
|
||||
|
||||
let metrics = &Metrics::get().proxy;
|
||||
let proto = ctx.protocol();
|
||||
let request_gauge = metrics.connection_requests.guard(proto);
|
||||
let request_gauge = metrics.connection_requests.guard(protocol);
|
||||
|
||||
let tls = config.tls_config.load();
|
||||
let tls = tls.as_deref();
|
||||
let handshake_result: Result<_, ClientRequestError> = async {
|
||||
let tls = config.tls_config.load();
|
||||
let tls = tls.as_deref();
|
||||
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
let do_handshake = handshake(ctx, stream, mode.handshake_tls(tls), record_handshake_error);
|
||||
let ctx = ctx_slot.as_ref().expect("context must be set");
|
||||
let record_handshake_error = !ctx.has_private_peer_addr();
|
||||
let data = {
|
||||
let _pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
tokio::time::timeout(
|
||||
config.handshake_timeout,
|
||||
handshake(
|
||||
ctx,
|
||||
stream,
|
||||
tracker,
|
||||
mode.handshake_tls(tls),
|
||||
record_handshake_error,
|
||||
),
|
||||
)
|
||||
.await??
|
||||
};
|
||||
|
||||
let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)
|
||||
.await??
|
||||
{
|
||||
HandshakeData::Startup(stream, params) => (stream, params),
|
||||
HandshakeData::Cancel(cancel_key_data) => {
|
||||
// spawn a task to cancel the session, but don't wait for it
|
||||
cancellations.spawn({
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let ctx = ctx.clone();
|
||||
let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
|
||||
match data {
|
||||
HandshakeData::Startup(mut stream, params) => {
|
||||
ctx.set_db_options(params.clone());
|
||||
|
||||
let host = mode.hostname(stream.get_ref());
|
||||
let cn = tls.map(|tls| &tls.common_names);
|
||||
|
||||
// Extract credentials which we're going to use for auth.
|
||||
let result = auth::ComputeUserInfoMaybeEndpoint::parse(ctx, ¶ms, host, cn);
|
||||
let user_info = match result {
|
||||
Ok(user_info) => user_info,
|
||||
Err(e) => stream.throw_error(e, Some(ctx)).await?,
|
||||
};
|
||||
|
||||
let session = cancellation_handler.get_key();
|
||||
Ok(Some((stream, params, session, user_info)))
|
||||
}
|
||||
HandshakeData::Cancel(cancel_key_data, tracker) => {
|
||||
let ctx = ctx_slot.take().expect("context must be set");
|
||||
ctx.set_success();
|
||||
|
||||
let cancel_span = tracing::info_span!(parent: None, "cancel_session", session_id = ?ctx.session_id());
|
||||
cancel_span.follows_from(tracing::Span::current());
|
||||
async move {
|
||||
cancellation_handler_clone
|
||||
|
||||
// spawn a task to cancel the session, but don't wait for it
|
||||
tokio::spawn(async move {
|
||||
// ensure the proxy doesn't shutdown until we complete this task.
|
||||
let _tracker = tracker;
|
||||
|
||||
cancellation_handler
|
||||
.cancel_session(
|
||||
cancel_key_data,
|
||||
ctx,
|
||||
@@ -317,133 +319,136 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
|
||||
config.authentication_config.is_vpc_acccess_proxy,
|
||||
auth_backend.get_api(),
|
||||
)
|
||||
.instrument(cancel_span)
|
||||
.await
|
||||
.inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
|
||||
}.instrument(cancel_span)
|
||||
});
|
||||
.unwrap_or_else(|e| debug!(error = ?e, "cancel_session failed"));
|
||||
});
|
||||
|
||||
return Ok(None);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
};
|
||||
drop(pause);
|
||||
|
||||
ctx.set_db_options(params.clone());
|
||||
|
||||
let hostname = mode.hostname(stream.get_ref());
|
||||
|
||||
let common_names = tls.map(|tls| &tls.common_names);
|
||||
|
||||
// Extract credentials which we're going to use for auth.
|
||||
let result = auth_backend
|
||||
.as_ref()
|
||||
.map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, ¶ms, hostname, common_names))
|
||||
.transpose();
|
||||
|
||||
let user_info = match result {
|
||||
Ok(user_info) => user_info,
|
||||
Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
|
||||
};
|
||||
|
||||
let user = user_info.get_user().to_owned();
|
||||
let user_info = match user_info
|
||||
.authenticate(
|
||||
ctx,
|
||||
&mut stream,
|
||||
mode.allow_cleartext(),
|
||||
&config.authentication_config,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(auth_result) => auth_result,
|
||||
Err(e) => {
|
||||
let db = params.get("database");
|
||||
let app = params.get("application_name");
|
||||
let params_span = tracing::info_span!("", ?user, ?db, ?app);
|
||||
|
||||
return Err(stream
|
||||
.throw_error(e, Some(ctx))
|
||||
.instrument(params_span)
|
||||
.await)?;
|
||||
}
|
||||
};
|
||||
|
||||
let compute_user_info = match &user_info {
|
||||
auth::Backend::ControlPlane(_, info) => &info.info,
|
||||
auth::Backend::Local(_) => unreachable!("local proxy does not run tcp proxy service"),
|
||||
};
|
||||
let params_compat = compute_user_info
|
||||
.options
|
||||
.get(NeonOptions::PARAMS_COMPAT)
|
||||
.is_some();
|
||||
|
||||
let res = connect_to_compute(
|
||||
ctx,
|
||||
&TcpMechanism {
|
||||
user_info: compute_user_info.clone(),
|
||||
params_compat,
|
||||
params: ¶ms,
|
||||
locks: &config.connect_compute_locks,
|
||||
},
|
||||
&user_info,
|
||||
config.wake_compute_retry_config,
|
||||
&config.connect_to_compute,
|
||||
)
|
||||
}
|
||||
.await;
|
||||
|
||||
let node = match res {
|
||||
Ok(node) => node,
|
||||
Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
|
||||
let Some((mut stream, params, session, user_info)) = handshake_result? else {
|
||||
return Ok(());
|
||||
};
|
||||
let ctx = ctx_slot.as_ref().expect("context must be set");
|
||||
|
||||
let cancellation_handler_clone = Arc::clone(&cancellation_handler);
|
||||
let session = cancellation_handler_clone.get_key();
|
||||
let auth_result: Result<_, ClientRequestError> = async {
|
||||
let user = user_info.user.clone();
|
||||
|
||||
session.write_cancel_key(node.cancel_closure.clone())?;
|
||||
prepare_client_connection(&node, *session.key(), &mut stream);
|
||||
let stream = stream.flush_and_into_inner().await?;
|
||||
match cplane
|
||||
.authenticate(
|
||||
ctx,
|
||||
&mut stream,
|
||||
user_info,
|
||||
mode.allow_cleartext(),
|
||||
&config.authentication_config,
|
||||
endpoint_rate_limiter,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(auth_result) => Ok(auth_result),
|
||||
Err(e) => {
|
||||
let db = params.get("database");
|
||||
let app = params.get("application_name");
|
||||
let params_span = tracing::info_span!("", ?user, ?db, ?app);
|
||||
stream
|
||||
.throw_error(e, Some(ctx))
|
||||
.instrument(params_span)
|
||||
.await?
|
||||
}
|
||||
}
|
||||
}
|
||||
.await;
|
||||
|
||||
let private_link_id = match ctx.extra() {
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
|
||||
None => None,
|
||||
};
|
||||
let compute_creds = auth_result?;
|
||||
|
||||
Ok(Some(ProxyPassthrough {
|
||||
client: stream,
|
||||
aux: node.aux.clone(),
|
||||
private_link_id,
|
||||
compute: node,
|
||||
session_id: ctx.session_id(),
|
||||
cancel: session,
|
||||
_req: request_gauge,
|
||||
_conn: conn_gauge,
|
||||
}))
|
||||
let connect_result: Result<_, ClientRequestError> = async {
|
||||
let compute_user_info = compute_creds.info.clone();
|
||||
let params_compat = compute_user_info
|
||||
.options
|
||||
.get(NeonOptions::PARAMS_COMPAT)
|
||||
.is_some();
|
||||
|
||||
let mut node = connect_to_compute(
|
||||
ctx,
|
||||
TcpMechanism {
|
||||
user_info: compute_user_info,
|
||||
params_compat,
|
||||
params: ¶ms,
|
||||
locks: &config.connect_compute_locks,
|
||||
},
|
||||
auth::ControlPlaneWakeCompute {
|
||||
cplane,
|
||||
creds: compute_creds,
|
||||
},
|
||||
config.wake_compute_retry_config,
|
||||
&config.connect_to_compute,
|
||||
)
|
||||
.or_else(|e| stream.throw_error(e, Some(ctx)))
|
||||
.await?;
|
||||
|
||||
session.write_cancel_key(node.cancel_closure.clone())?;
|
||||
prepare_client_connection(&node, *session.key(), &mut stream).await?;
|
||||
|
||||
// Before proxy passing, forward to compute whatever data is left in the
|
||||
// PqStream input buffer. Normally there is none, but our serverless npm
|
||||
// driver in pipeline mode sends startup, password and first query
|
||||
// immediately after opening the connection.
|
||||
let (stream, read_buf, tracker) = stream.into_inner();
|
||||
node.stream.write_all(&read_buf).await?;
|
||||
|
||||
Ok((node, stream, tracker))
|
||||
}
|
||||
.await;
|
||||
|
||||
let (node, stream, tracker) = connect_result?;
|
||||
|
||||
let ctx = ctx_slot.take().expect("context must be set");
|
||||
ctx.set_success();
|
||||
|
||||
tokio::spawn(passthrough(
|
||||
ctx,
|
||||
&config.connect_to_compute,
|
||||
stream,
|
||||
node,
|
||||
session,
|
||||
request_gauge,
|
||||
conn_gauge,
|
||||
tracker,
|
||||
));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finish client connection initialization: confirm auth success, send params, etc.
|
||||
pub(crate) fn prepare_client_connection(
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn prepare_client_connection(
|
||||
node: &compute::PostgresConnection,
|
||||
cancel_key_data: CancelKeyData,
|
||||
stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||
) {
|
||||
) -> Result<(), std::io::Error> {
|
||||
// Forward all deferred notices to the client.
|
||||
for notice in &node.delayed_notice {
|
||||
stream.write_raw(notice.as_bytes().len(), b'N', |buf| {
|
||||
buf.extend_from_slice(notice.as_bytes());
|
||||
});
|
||||
stream.write_message_noflush(&Be::Raw(b'N', notice.as_bytes()))?;
|
||||
}
|
||||
|
||||
// Forward all postgres connection params to the client.
|
||||
for (name, value) in &node.params {
|
||||
stream.write_message(BeMessage::ParameterStatus {
|
||||
stream.write_message_noflush(&Be::ParameterStatus {
|
||||
name: name.as_bytes(),
|
||||
value: value.as_bytes(),
|
||||
});
|
||||
})?;
|
||||
}
|
||||
|
||||
stream.write_message(BeMessage::BackendKeyData(cancel_key_data));
|
||||
stream.write_message(BeMessage::ReadyForQuery);
|
||||
stream
|
||||
.write_message_noflush(&Be::BackendKeyData(cancel_key_data))?
|
||||
.write_message(&Be::ReadyForQuery)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use futures::FutureExt;
|
||||
use smol_str::SmolStr;
|
||||
use smol_str::{SmolStr, ToSmolStr};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::debug;
|
||||
use utils::measured_stream::MeasuredStream;
|
||||
|
||||
@@ -8,13 +8,14 @@ use super::copy_bidirectional::ErrorSource;
|
||||
use crate::cancellation;
|
||||
use crate::compute::PostgresConnection;
|
||||
use crate::config::ComputeConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::control_plane::messages::MetricsAuxInfo;
|
||||
use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::stream::Stream;
|
||||
use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};
|
||||
|
||||
/// Forward bytes in both directions (client <-> compute).
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn proxy_pass(
|
||||
client: impl AsyncRead + AsyncWrite + Unpin,
|
||||
compute: impl AsyncRead + AsyncWrite + Unpin,
|
||||
@@ -62,42 +63,53 @@ pub(crate) async fn proxy_pass(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) struct ProxyPassthrough<S> {
|
||||
pub(crate) client: Stream<S>,
|
||||
pub(crate) compute: PostgresConnection,
|
||||
pub(crate) aux: MetricsAuxInfo,
|
||||
pub(crate) session_id: uuid::Uuid,
|
||||
pub(crate) private_link_id: Option<SmolStr>,
|
||||
pub(crate) cancel: cancellation::Session,
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn passthrough<S: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
|
||||
ctx: RequestContext,
|
||||
compute_config: &'static ComputeConfig,
|
||||
|
||||
pub(crate) _req: NumConnectionRequestsGuard<'static>,
|
||||
pub(crate) _conn: NumClientConnectionsGuard<'static>,
|
||||
}
|
||||
client: Stream<S>,
|
||||
compute: PostgresConnection,
|
||||
cancel: cancellation::Session,
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
|
||||
pub(crate) async fn proxy_pass(
|
||||
self,
|
||||
compute_config: &ComputeConfig,
|
||||
) -> Result<(), ErrorSource> {
|
||||
let res = proxy_pass(
|
||||
self.client,
|
||||
self.compute.stream,
|
||||
self.aux,
|
||||
self.private_link_id,
|
||||
)
|
||||
.await;
|
||||
if let Err(err) = self
|
||||
.compute
|
||||
.cancel_closure
|
||||
.try_cancel_query(compute_config)
|
||||
.boxed()
|
||||
.await
|
||||
{
|
||||
tracing::warn!(session_id = ?self.session_id, ?err, "could not cancel the query in the database");
|
||||
_req: NumConnectionRequestsGuard<'static>,
|
||||
_conn: NumClientConnectionsGuard<'static>,
|
||||
_tracker: TaskTrackerToken,
|
||||
) {
|
||||
let session_id = ctx.session_id();
|
||||
let private_link_id = match ctx.extra() {
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let _disconnect = ctx.log_connect();
|
||||
let res = proxy_pass(client, compute.stream, compute.aux, private_link_id).await;
|
||||
|
||||
match res {
|
||||
Ok(()) => {}
|
||||
Err(ErrorSource::Client(e)) => {
|
||||
tracing::warn!(
|
||||
session_id = ?session_id,
|
||||
"per-client task finished with an IO error from the client: {e:#}"
|
||||
);
|
||||
}
|
||||
Err(ErrorSource::Compute(e)) => {
|
||||
tracing::error!(
|
||||
session_id = ?session_id,
|
||||
"per-client task finished with an IO error from the compute: {e:#}"
|
||||
);
|
||||
}
|
||||
|
||||
drop(self.cancel.remove_cancel_key()); // we don't need a result. If the queue is full, we just log the error
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
if let Err(err) = compute
|
||||
.cancel_closure
|
||||
.try_cancel_query(compute_config)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(session_id = ?session_id, ?err, "could not cancel the query in the database");
|
||||
}
|
||||
|
||||
// we don't need a result. If the queue is full, we just log the error
|
||||
drop(cancel.remove_cancel_key());
|
||||
}
|
||||
|
||||
@@ -125,9 +125,8 @@ pub(crate) fn retry_after(num_retries: u32, config: RetryConfig) -> time::Durati
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use postgres_client::error::{DbError, SqlState};
|
||||
|
||||
use super::ShouldRetryWakeCompute;
|
||||
use postgres_client::error::{DbError, SqlState};
|
||||
|
||||
#[test]
|
||||
fn should_retry_wake_compute_for_db_error() {
|
||||
|
||||
@@ -10,7 +10,7 @@ use bytes::{Bytes, BytesMut};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use postgres_client::tls::TlsConnect;
|
||||
use postgres_protocol::message::frontend;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt, DuplexStream};
|
||||
use tokio::io::{AsyncReadExt, DuplexStream};
|
||||
use tokio_util::codec::{Decoder, Encoder};
|
||||
|
||||
use super::*;
|
||||
@@ -38,6 +38,7 @@ async fn proxy_mitm(
|
||||
let (end_client, startup) = match handshake(
|
||||
&RequestContext::test(),
|
||||
client1,
|
||||
TaskTracker::new().token(),
|
||||
Some(&server_config1),
|
||||
false,
|
||||
)
|
||||
@@ -45,18 +46,19 @@ async fn proxy_mitm(
|
||||
.unwrap()
|
||||
{
|
||||
HandshakeData::Startup(stream, params) => (stream, params),
|
||||
HandshakeData::Cancel(_) => panic!("cancellation not supported"),
|
||||
HandshakeData::Cancel(_, _) => panic!("cancellation not supported"),
|
||||
};
|
||||
|
||||
let mut end_server = tokio_util::codec::Framed::new(end_server, PgFrame);
|
||||
let end_client = end_client.flush_and_into_inner().await.unwrap();
|
||||
let (end_client, buf) = end_client.framed.into_inner();
|
||||
assert!(buf.is_empty());
|
||||
let mut end_client = tokio_util::codec::Framed::new(end_client, PgFrame);
|
||||
|
||||
// give the end_server the startup parameters
|
||||
let mut buf = BytesMut::new();
|
||||
frontend::startup_message(
|
||||
&postgres_protocol::message::frontend::StartupMessageParams {
|
||||
params: startup.params.as_bytes().into(),
|
||||
params: startup.params.into(),
|
||||
},
|
||||
&mut buf,
|
||||
)
|
||||
|
||||
@@ -15,6 +15,7 @@ use rstest::rstest;
|
||||
use rustls::crypto::ring;
|
||||
use rustls::pki_types;
|
||||
use tokio::io::DuplexStream;
|
||||
use tokio_util::task::TaskTracker;
|
||||
use tracing_test::traced_test;
|
||||
|
||||
use super::connect_compute::ConnectMechanism;
|
||||
@@ -26,7 +27,9 @@ use crate::auth::backend::{
|
||||
use crate::config::{ComputeConfig, RetryConfig};
|
||||
use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
|
||||
use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
|
||||
use crate::control_plane::{self, CachedNodeInfo, NodeInfo, NodeInfoCache};
|
||||
use crate::control_plane::{
|
||||
self, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo, NodeInfo, NodeInfoCache,
|
||||
};
|
||||
use crate::error::ErrorKind;
|
||||
use crate::tls::client_config::compute_client_config_with_certs;
|
||||
use crate::tls::postgres_rustls::MakeRustlsConnect;
|
||||
@@ -126,7 +129,7 @@ trait TestAuth: Sized {
|
||||
self,
|
||||
stream: &mut PqStream<Stream<S>>,
|
||||
) -> anyhow::Result<()> {
|
||||
stream.write_message(BeMessage::AuthenticationOk);
|
||||
stream.write_message_noflush(&Be::AuthenticationOk)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -155,7 +158,9 @@ impl TestAuth for Scram {
|
||||
self,
|
||||
stream: &mut PqStream<Stream<S>>,
|
||||
) -> anyhow::Result<()> {
|
||||
let outcome = auth::AuthFlow::new(stream, auth::Scram(&self.0, &RequestContext::test()))
|
||||
let outcome = auth::AuthFlow::new(stream)
|
||||
.begin(auth::Scram(&self.0, &RequestContext::test()))
|
||||
.await?
|
||||
.authenticate()
|
||||
.await?;
|
||||
|
||||
@@ -174,19 +179,19 @@ async fn dummy_proxy(
|
||||
auth: impl TestAuth + Send,
|
||||
) -> anyhow::Result<()> {
|
||||
let (client, _) = read_proxy_protocol(client).await?;
|
||||
let mut stream = match handshake(&RequestContext::test(), client, tls.as_ref(), false).await? {
|
||||
HandshakeData::Startup(stream, _) => stream,
|
||||
HandshakeData::Cancel(_) => bail!("cancellation not supported"),
|
||||
};
|
||||
let t = TaskTracker::new().token();
|
||||
let mut stream =
|
||||
match handshake(&RequestContext::test(), client, t, tls.as_ref(), false).await? {
|
||||
HandshakeData::Startup(stream, _) => stream,
|
||||
HandshakeData::Cancel(_, _) => bail!("cancellation not supported"),
|
||||
};
|
||||
|
||||
auth.authenticate(&mut stream).await?;
|
||||
|
||||
stream.write_message(BeMessage::ParameterStatus {
|
||||
name: b"client_encoding",
|
||||
value: b"UTF8",
|
||||
});
|
||||
stream.write_message(BeMessage::ReadyForQuery);
|
||||
stream.flush().await?;
|
||||
stream
|
||||
.write_message_noflush(&Be::CLIENT_ENCODING)?
|
||||
.write_message(&Be::ReadyForQuery)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -545,9 +550,20 @@ impl TestControlPlaneClient for TestConnectMechanism {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_access_control(
|
||||
fn get_allowed_ips(&self) -> Result<CachedAllowedIps, control_plane::errors::GetAuthInfoError> {
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
fn get_allowed_vpc_endpoint_ids(
|
||||
&self,
|
||||
) -> Result<control_plane::EndpointAccessControl, control_plane::errors::GetAuthInfoError> {
|
||||
) -> Result<CachedAllowedVpcEndpointIds, control_plane::errors::GetAuthInfoError> {
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
fn get_block_public_or_vpc_access(
|
||||
&self,
|
||||
) -> Result<control_plane::CachedAccessBlockerFlags, control_plane::errors::GetAuthInfoError>
|
||||
{
|
||||
unimplemented!("not used in tests")
|
||||
}
|
||||
|
||||
@@ -609,7 +625,7 @@ async fn connect_to_compute_success() {
|
||||
let mechanism = TestConnectMechanism::new(vec![Wake, Connect]);
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let config = config();
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
|
||||
.await
|
||||
.unwrap();
|
||||
mechanism.verify();
|
||||
@@ -623,7 +639,7 @@ async fn connect_to_compute_retry() {
|
||||
let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let config = config();
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
|
||||
.await
|
||||
.unwrap();
|
||||
mechanism.verify();
|
||||
@@ -638,7 +654,7 @@ async fn connect_to_compute_non_retry_1() {
|
||||
let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]);
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let config = config();
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
|
||||
.await
|
||||
.unwrap_err();
|
||||
mechanism.verify();
|
||||
@@ -653,7 +669,7 @@ async fn connect_to_compute_non_retry_2() {
|
||||
let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]);
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let config = config();
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
|
||||
.await
|
||||
.unwrap();
|
||||
mechanism.verify();
|
||||
@@ -678,7 +694,7 @@ async fn connect_to_compute_non_retry_3() {
|
||||
connect_to_compute(
|
||||
&ctx,
|
||||
&mechanism,
|
||||
&user_info,
|
||||
user_info,
|
||||
wake_compute_retry_config,
|
||||
&config,
|
||||
)
|
||||
@@ -696,7 +712,7 @@ async fn wake_retry() {
|
||||
let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]);
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let config = config();
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
|
||||
.await
|
||||
.unwrap();
|
||||
mechanism.verify();
|
||||
@@ -711,7 +727,7 @@ async fn wake_non_retry() {
|
||||
let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]);
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let config = config();
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, config.retry, &config)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, config.retry, &config)
|
||||
.await
|
||||
.unwrap_err();
|
||||
mechanism.verify();
|
||||
@@ -730,7 +746,7 @@ async fn fail_but_wake_invalidates_cache() {
|
||||
let user = helper_create_connect_info(&mech);
|
||||
let cfg = config();
|
||||
|
||||
connect_to_compute(&ctx, &mech, &user, cfg.retry, &cfg)
|
||||
connect_to_compute(&ctx, &mech, user, cfg.retry, &cfg)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -751,7 +767,7 @@ async fn fail_no_wake_skips_cache_invalidation() {
|
||||
let user = helper_create_connect_info(&mech);
|
||||
let cfg = config();
|
||||
|
||||
connect_to_compute(&ctx, &mech, &user, cfg.retry, &cfg)
|
||||
connect_to_compute(&ctx, &mech, user, cfg.retry, &cfg)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -772,7 +788,7 @@ async fn retry_but_wake_invalidates_cache() {
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let cfg = config();
|
||||
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, cfg.retry, &cfg)
|
||||
.await
|
||||
.unwrap();
|
||||
mechanism.verify();
|
||||
@@ -795,7 +811,7 @@ async fn retry_no_wake_skips_invalidation() {
|
||||
let user_info = helper_create_connect_info(&mechanism);
|
||||
let cfg = config();
|
||||
|
||||
connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
|
||||
connect_to_compute(&ctx, &mechanism, user_info, cfg.retry, &cfg)
|
||||
.await
|
||||
.unwrap_err();
|
||||
mechanism.verify();
|
||||
|
||||
@@ -15,7 +15,7 @@ pub type EndpointRateLimiter = LeakyBucketRateLimiter<EndpointIdInt>;
|
||||
|
||||
pub struct LeakyBucketRateLimiter<Key> {
|
||||
map: ClashMap<Key, LeakyBucketState, RandomState>,
|
||||
default_config: utils::leaky_bucket::LeakyBucketConfig,
|
||||
config: utils::leaky_bucket::LeakyBucketConfig,
|
||||
access_count: AtomicUsize,
|
||||
}
|
||||
|
||||
@@ -28,17 +28,15 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
|
||||
pub fn new_with_shards(config: LeakyBucketConfig, shards: usize) -> Self {
|
||||
Self {
|
||||
map: ClashMap::with_hasher_and_shard_amount(RandomState::new(), shards),
|
||||
default_config: config.into(),
|
||||
config: config.into(),
|
||||
access_count: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check that number of connections to the endpoint is below `max_rps` rps.
|
||||
pub(crate) fn check(&self, key: K, config: Option<LeakyBucketConfig>, n: u32) -> bool {
|
||||
pub(crate) fn check(&self, key: K, n: u32) -> bool {
|
||||
let now = Instant::now();
|
||||
|
||||
let config = config.map_or(self.default_config, Into::into);
|
||||
|
||||
if self.access_count.fetch_add(1, Ordering::AcqRel) % 2048 == 0 {
|
||||
self.do_gc(now);
|
||||
}
|
||||
@@ -48,7 +46,7 @@ impl<K: Hash + Eq> LeakyBucketRateLimiter<K> {
|
||||
.entry(key)
|
||||
.or_insert_with(|| LeakyBucketState { empty_at: now });
|
||||
|
||||
entry.add_tokens(&config, now, n as f64).is_ok()
|
||||
entry.add_tokens(&self.config, now, n as f64).is_ok()
|
||||
}
|
||||
|
||||
fn do_gc(&self, now: Instant) {
|
||||
|
||||
@@ -15,8 +15,6 @@ use tracing::info;
|
||||
use crate::ext::LockExt;
|
||||
use crate::intern::EndpointIdInt;
|
||||
|
||||
use super::LeakyBucketConfig;
|
||||
|
||||
pub struct GlobalRateLimiter {
|
||||
data: Vec<RateBucket>,
|
||||
info: Vec<RateBucketInfo>,
|
||||
@@ -146,6 +144,19 @@ impl RateBucketInfo {
|
||||
Self::new(50_000, Duration::from_secs(10)),
|
||||
];
|
||||
|
||||
/// All of these are per endpoint-maskedip pair.
|
||||
/// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
|
||||
///
|
||||
/// First bucket: 1000mcpus total per endpoint-ip pair
|
||||
/// * 4096000 requests per second with 1 hash rounds.
|
||||
/// * 1000 requests per second with 4096 hash rounds.
|
||||
/// * 6.8 requests per second with 600000 hash rounds.
|
||||
pub const DEFAULT_AUTH_SET: [Self; 3] = [
|
||||
Self::new(1000 * 4096, Duration::from_secs(1)),
|
||||
Self::new(600 * 4096, Duration::from_secs(60)),
|
||||
Self::new(300 * 4096, Duration::from_secs(600)),
|
||||
];
|
||||
|
||||
pub fn rps(&self) -> f64 {
|
||||
(self.max_rpi as f64) / self.interval.as_secs_f64()
|
||||
}
|
||||
@@ -173,21 +184,6 @@ impl RateBucketInfo {
|
||||
max_rpi: ((max_rps as u64) * (interval.as_millis() as u64) / 1000) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_leaky_bucket(this: &[Self]) -> Option<LeakyBucketConfig> {
|
||||
// bit of a hack - find the min rps and max rps supported and turn it into
|
||||
// leaky bucket config instead
|
||||
|
||||
let mut iter = this.iter().map(|info| info.rps());
|
||||
let first = iter.next()?;
|
||||
|
||||
let (min, max) = (first, first);
|
||||
let (min, max) = iter.fold((min, max), |(min, max), rps| {
|
||||
(f64::min(min, rps), f64::max(max, rps))
|
||||
});
|
||||
|
||||
Some(LeakyBucketConfig { rps: min, max })
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Hash + Eq> BucketRateLimiter<K> {
|
||||
|
||||
@@ -8,4 +8,4 @@ pub(crate) use limit_algorithm::aimd::Aimd;
|
||||
pub(crate) use limit_algorithm::{
|
||||
DynamicLimiter, Outcome, RateLimitAlgorithm, RateLimiterConfig, Token,
|
||||
};
|
||||
pub use limiter::{GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
|
||||
pub use limiter::{BucketRateLimiter, GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
use core::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use pq_proto::CancelKeyData;
|
||||
use tokio::sync::Mutex;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::pqproto::CancelKeyData;
|
||||
|
||||
pub trait CancellationPublisherMut: Send + Sync + 'static {
|
||||
#[allow(async_fn_in_trait)]
|
||||
async fn try_publish(
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use anyhow::Ok;
|
||||
|
||||
use crate::pqproto::{CancelKeyData, id_to_cancel_key};
|
||||
use pq_proto::{CancelKeyData, id_to_cancel_key};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod keyspace {
|
||||
pub const CANCEL_PREFIX: &str = "cancel";
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub(crate) enum KeyPrefix {
|
||||
#[serde(untagged)]
|
||||
Cancel(CancelKeyData),
|
||||
}
|
||||
|
||||
@@ -17,7 +18,9 @@ impl KeyPrefix {
|
||||
pub(crate) fn build_redis_key(&self) -> String {
|
||||
match self {
|
||||
KeyPrefix::Cancel(key) => {
|
||||
let id = key.0.get();
|
||||
let hi = (key.backend_pid as u64) << 32;
|
||||
let lo = (key.cancel_key as u64) & 0xffff_ffff;
|
||||
let id = hi | lo;
|
||||
let keyspace = keyspace::CANCEL_PREFIX;
|
||||
format!("{keyspace}:{id:x}")
|
||||
}
|
||||
@@ -60,7 +63,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_build_redis_key() {
|
||||
let cancel_key: KeyPrefix = KeyPrefix::Cancel(id_to_cancel_key(12345 << 32 | 54321));
|
||||
let cancel_key: KeyPrefix = KeyPrefix::Cancel(CancelKeyData {
|
||||
backend_pid: 12345,
|
||||
cancel_key: 54321,
|
||||
});
|
||||
|
||||
let redis_key = cancel_key.build_redis_key();
|
||||
assert_eq!(redis_key, "cancel:30390000d431");
|
||||
@@ -71,7 +77,10 @@ mod tests {
|
||||
let redis_key = "cancel:30390000d431";
|
||||
let key: KeyPrefix = parse_redis_key(redis_key).expect("Failed to parse key");
|
||||
|
||||
let ref_key = id_to_cancel_key(12345 << 32 | 54321);
|
||||
let ref_key = CancelKeyData {
|
||||
backend_pid: 12345,
|
||||
cancel_key: 54321,
|
||||
};
|
||||
|
||||
assert_eq!(key.as_str(), KeyPrefix::Cancel(ref_key).as_str());
|
||||
let KeyPrefix::Cancel(cancel_key) = key;
|
||||
|
||||
@@ -2,9 +2,11 @@ use std::convert::Infallible;
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::StreamExt;
|
||||
use pq_proto::CancelKeyData;
|
||||
use redis::aio::PubSub;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
|
||||
use crate::cache::project_info::ProjectInfoCache;
|
||||
@@ -98,6 +100,14 @@ pub(crate) struct PasswordUpdate {
|
||||
role_name: RoleNameInt,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub(crate) struct CancelSession {
|
||||
pub(crate) region_id: Option<String>,
|
||||
pub(crate) cancel_key_data: CancelKeyData,
|
||||
pub(crate) session_id: Uuid,
|
||||
pub(crate) peer_addr: Option<std::net::IpAddr>,
|
||||
}
|
||||
|
||||
fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
|
||||
where
|
||||
T: for<'de2> serde::Deserialize<'de2>,
|
||||
@@ -233,30 +243,29 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
|
||||
|
||||
fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
|
||||
match msg {
|
||||
Notification::AllowedIpsUpdate {
|
||||
allowed_ips_update: AllowedIpsUpdate { project_id },
|
||||
Notification::AllowedIpsUpdate { allowed_ips_update } => {
|
||||
cache.invalidate_allowed_ips_for_project(allowed_ips_update.project_id);
|
||||
}
|
||||
| Notification::BlockPublicOrVpcAccessUpdated {
|
||||
block_public_or_vpc_access_updated: BlockPublicOrVpcAccessUpdated { project_id },
|
||||
} => cache.invalidate_endpoint_access_for_project(project_id),
|
||||
Notification::BlockPublicOrVpcAccessUpdated {
|
||||
block_public_or_vpc_access_updated,
|
||||
} => cache.invalidate_block_public_or_vpc_access_for_project(
|
||||
block_public_or_vpc_access_updated.project_id,
|
||||
),
|
||||
Notification::AllowedVpcEndpointsUpdatedForOrg {
|
||||
allowed_vpc_endpoints_updated_for_org: AllowedVpcEndpointsUpdatedForOrg { account_id },
|
||||
} => cache.invalidate_endpoint_access_for_org(account_id),
|
||||
allowed_vpc_endpoints_updated_for_org,
|
||||
} => cache.invalidate_allowed_vpc_endpoint_ids_for_org(
|
||||
allowed_vpc_endpoints_updated_for_org.account_id,
|
||||
),
|
||||
Notification::AllowedVpcEndpointsUpdatedForProjects {
|
||||
allowed_vpc_endpoints_updated_for_projects:
|
||||
AllowedVpcEndpointsUpdatedForProjects { project_ids },
|
||||
} => {
|
||||
for project in project_ids {
|
||||
cache.invalidate_endpoint_access_for_project(project);
|
||||
}
|
||||
}
|
||||
Notification::PasswordUpdate {
|
||||
password_update:
|
||||
PasswordUpdate {
|
||||
project_id,
|
||||
role_name,
|
||||
},
|
||||
} => cache.invalidate_role_secret_for_project(project_id, role_name),
|
||||
allowed_vpc_endpoints_updated_for_projects,
|
||||
} => cache.invalidate_allowed_vpc_endpoint_ids_for_projects(
|
||||
allowed_vpc_endpoints_updated_for_projects.project_ids,
|
||||
),
|
||||
Notification::PasswordUpdate { password_update } => cache
|
||||
.invalidate_role_secret_for_project(
|
||||
password_update.project_id,
|
||||
password_update.role_name,
|
||||
),
|
||||
Notification::UnknownTopic => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
//! Definitions for SASL messages.
|
||||
|
||||
use pq_proto::{BeAuthenticationSaslMessage, BeMessage};
|
||||
|
||||
use crate::parse::split_cstr;
|
||||
|
||||
/// SASL-specific payload of [`PasswordMessage`](pq_proto::FeMessage::PasswordMessage).
|
||||
@@ -28,6 +30,26 @@ impl<'a> FirstMessage<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A single SASL message.
|
||||
/// This struct is deliberately decoupled from lower-level
|
||||
/// [`BeAuthenticationSaslMessage`].
|
||||
#[derive(Debug)]
|
||||
pub(super) enum ServerMessage<T> {
|
||||
/// We expect to see more steps.
|
||||
Continue(T),
|
||||
/// This is the final step.
|
||||
Final(T),
|
||||
}
|
||||
|
||||
impl<'a> ServerMessage<&'a str> {
|
||||
pub(super) fn to_reply(&self) -> BeMessage<'a> {
|
||||
BeMessage::AuthenticationSasl(match self {
|
||||
ServerMessage::Continue(s) => BeAuthenticationSaslMessage::Continue(s.as_bytes()),
|
||||
ServerMessage::Final(s) => BeAuthenticationSaslMessage::Final(s.as_bytes()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::io;
|
||||
|
||||
pub(crate) use channel_binding::ChannelBinding;
|
||||
pub(crate) use messages::FirstMessage;
|
||||
pub(crate) use stream::{Outcome, authenticate};
|
||||
pub(crate) use stream::{Outcome, SaslStream};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::error::{ReportableError, UserFacingError};
|
||||
@@ -22,9 +22,6 @@ use crate::error::{ReportableError, UserFacingError};
|
||||
/// Fine-grained auth errors help in writing tests.
|
||||
#[derive(Error, Debug)]
|
||||
pub(crate) enum Error {
|
||||
#[error("Unsupported authentication method: {0}")]
|
||||
BadAuthMethod(Box<str>),
|
||||
|
||||
#[error("Channel binding failed: {0}")]
|
||||
ChannelBindingFailed(&'static str),
|
||||
|
||||
@@ -57,7 +54,6 @@ impl UserFacingError for Error {
|
||||
impl ReportableError for Error {
|
||||
fn get_error_kind(&self) -> crate::error::ErrorKind {
|
||||
match self {
|
||||
Error::BadAuthMethod(_) => crate::error::ErrorKind::User,
|
||||
Error::ChannelBindingFailed(_) => crate::error::ErrorKind::User,
|
||||
Error::ChannelBindingBadMethod(_) => crate::error::ErrorKind::User,
|
||||
Error::BadClientMessage(_) => crate::error::ErrorKind::User,
|
||||
|
||||
@@ -3,12 +3,61 @@
|
||||
use std::io;
|
||||
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tracing::info;
|
||||
|
||||
use super::{Mechanism, Step};
|
||||
use crate::context::RequestContext;
|
||||
use crate::pqproto::{BeAuthenticationSaslMessage, BeMessage};
|
||||
use super::Mechanism;
|
||||
use super::messages::ServerMessage;
|
||||
use crate::stream::PqStream;
|
||||
|
||||
/// Abstracts away all peculiarities of the libpq's protocol.
|
||||
pub(crate) struct SaslStream<'a, S> {
|
||||
/// The underlying stream.
|
||||
stream: &'a mut PqStream<S>,
|
||||
/// Current password message we received from client.
|
||||
current: bytes::Bytes,
|
||||
/// First SASL message produced by client.
|
||||
first: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, S> SaslStream<'a, S> {
|
||||
pub(crate) fn new(stream: &'a mut PqStream<S>, first: &'a str) -> Self {
|
||||
Self {
|
||||
stream,
|
||||
current: bytes::Bytes::new(),
|
||||
first: Some(first),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + Unpin> SaslStream<'_, S> {
|
||||
// Receive a new SASL message from the client.
|
||||
async fn recv(&mut self) -> io::Result<&str> {
|
||||
if let Some(first) = self.first.take() {
|
||||
return Ok(first);
|
||||
}
|
||||
|
||||
self.current = self.stream.read_password_message().await?;
|
||||
let s = std::str::from_utf8(&self.current)
|
||||
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "bad encoding"))?;
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncWrite + Unpin> SaslStream<'_, S> {
|
||||
// Send a SASL message to the client.
|
||||
async fn send(&mut self, msg: &ServerMessage<&str>) -> io::Result<()> {
|
||||
self.stream.write_message(&msg.to_reply()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Queue a SASL message for the client.
|
||||
fn send_noflush(&mut self, msg: &ServerMessage<&str>) -> io::Result<()> {
|
||||
self.stream.write_message_noflush(&msg.to_reply())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// SASL authentication outcome.
|
||||
/// It's much easier to match on those two variants
|
||||
/// than to peek into a noisy protocol error type.
|
||||
@@ -20,63 +69,33 @@ pub(crate) enum Outcome<R> {
|
||||
Failure(&'static str),
|
||||
}
|
||||
|
||||
pub async fn authenticate<S, F, M>(
|
||||
ctx: &RequestContext,
|
||||
stream: &mut PqStream<S>,
|
||||
mechanism: F,
|
||||
) -> super::Result<Outcome<M::Output>>
|
||||
where
|
||||
S: AsyncRead + AsyncWrite + Unpin,
|
||||
F: FnOnce(&str) -> super::Result<M>,
|
||||
M: Mechanism,
|
||||
{
|
||||
let (mut mechanism, mut input) = {
|
||||
// pause the timer while we communicate with the client
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> SaslStream<'_, S> {
|
||||
/// Perform SASL message exchange according to the underlying algorithm
|
||||
/// until user is either authenticated or denied access.
|
||||
pub(crate) async fn authenticate<M: Mechanism>(
|
||||
mut self,
|
||||
mut mechanism: M,
|
||||
) -> super::Result<Outcome<M::Output>> {
|
||||
loop {
|
||||
let input = self.recv().await?;
|
||||
let step = mechanism.exchange(input).map_err(|error| {
|
||||
info!(?error, "error during SASL exchange");
|
||||
error
|
||||
})?;
|
||||
|
||||
// Initial client message contains the chosen auth method's name.
|
||||
let msg = stream.read_password_message().await?;
|
||||
|
||||
let sasl = super::FirstMessage::parse(msg)
|
||||
.ok_or(super::Error::BadClientMessage("bad sasl message"))?;
|
||||
|
||||
(mechanism(sasl.method)?, sasl.message)
|
||||
};
|
||||
|
||||
loop {
|
||||
match mechanism.exchange(input) {
|
||||
Ok(Step::Continue(moved_mechanism, reply)) => {
|
||||
mechanism = moved_mechanism;
|
||||
|
||||
// write reply
|
||||
let sasl_msg = BeAuthenticationSaslMessage::Continue(reply.as_bytes());
|
||||
stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));
|
||||
drop(reply);
|
||||
}
|
||||
Ok(Step::Success(result, reply)) => {
|
||||
// write reply
|
||||
let sasl_msg = BeAuthenticationSaslMessage::Final(reply.as_bytes());
|
||||
stream.write_message(BeMessage::AuthenticationSasl(sasl_msg));
|
||||
stream.write_message(BeMessage::AuthenticationOk);
|
||||
|
||||
// exit with success
|
||||
break Ok(Outcome::Success(result));
|
||||
}
|
||||
// exit with failure
|
||||
Ok(Step::Failure(reason)) => break Ok(Outcome::Failure(reason)),
|
||||
Err(error) => {
|
||||
tracing::info!(?error, "error during SASL exchange");
|
||||
return Err(error);
|
||||
}
|
||||
use super::Step;
|
||||
return Ok(match step {
|
||||
Step::Continue(moved_mechanism, reply) => {
|
||||
self.send(&ServerMessage::Continue(&reply)).await?;
|
||||
mechanism = moved_mechanism;
|
||||
continue;
|
||||
}
|
||||
Step::Success(result, reply) => {
|
||||
self.send_noflush(&ServerMessage::Final(&reply))?;
|
||||
Outcome::Success(result)
|
||||
}
|
||||
Step::Failure(reason) => Outcome::Failure(reason),
|
||||
});
|
||||
}
|
||||
|
||||
// pause the timer while we communicate with the client
|
||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
|
||||
|
||||
// get next input
|
||||
stream.flush().await?;
|
||||
let msg = stream.read_password_message().await?;
|
||||
input = std::str::from_utf8(msg)
|
||||
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "bad encoding"))?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ use super::http_conn_pool::{self, HttpConnPool, Send, poll_http2_client};
|
||||
use super::local_conn_pool::{self, EXT_NAME, EXT_SCHEMA, EXT_VERSION, LocalConnPool};
|
||||
use crate::auth::backend::local::StaticAuthRules;
|
||||
use crate::auth::backend::{ComputeCredentials, ComputeUserInfo};
|
||||
use crate::auth::{self, AuthError};
|
||||
use crate::auth::{self, AuthError, check_peer_addr_is_in_list};
|
||||
use crate::compute;
|
||||
use crate::compute_ctl::{
|
||||
ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,
|
||||
@@ -35,6 +35,7 @@ use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};
|
||||
use crate::control_plane::locks::ApiLocks;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::intern::EndpointIdInt;
|
||||
use crate::protocol2::ConnectionInfoExtra;
|
||||
use crate::proxy::connect_compute::ConnectMechanism;
|
||||
use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
@@ -62,24 +63,63 @@ impl PoolingBackend {
|
||||
|
||||
let user_info = user_info.clone();
|
||||
let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
|
||||
let access_control = backend.get_endpoint_access_control(ctx).await?;
|
||||
access_control.check(
|
||||
ctx,
|
||||
self.config.authentication_config.ip_allowlist_check_enabled,
|
||||
self.config.authentication_config.is_vpc_acccess_proxy,
|
||||
)?;
|
||||
let allowed_ips = backend.get_allowed_ips(ctx).await?;
|
||||
|
||||
let ep = EndpointIdInt::from(&user_info.endpoint);
|
||||
let rate_limit_config = None;
|
||||
if !self.endpoint_rate_limiter.check(ep, rate_limit_config, 1) {
|
||||
if self.config.authentication_config.ip_allowlist_check_enabled
|
||||
&& !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
|
||||
{
|
||||
return Err(AuthError::ip_address_not_allowed(ctx.peer_addr()));
|
||||
}
|
||||
|
||||
let access_blocker_flags = backend.get_block_public_or_vpc_access(ctx).await?;
|
||||
if self.config.authentication_config.is_vpc_acccess_proxy {
|
||||
if access_blocker_flags.vpc_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
let extra = ctx.extra();
|
||||
let incoming_endpoint_id = match extra {
|
||||
None => String::new(),
|
||||
Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
|
||||
Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
|
||||
};
|
||||
|
||||
if incoming_endpoint_id.is_empty() {
|
||||
return Err(AuthError::MissingVPCEndpointId);
|
||||
}
|
||||
|
||||
let allowed_vpc_endpoint_ids = backend.get_allowed_vpc_endpoint_ids(ctx).await?;
|
||||
// TODO: For now an empty VPC endpoint ID list means all are allowed. We should replace that.
|
||||
if !allowed_vpc_endpoint_ids.is_empty()
|
||||
&& !allowed_vpc_endpoint_ids.contains(&incoming_endpoint_id)
|
||||
{
|
||||
return Err(AuthError::vpc_endpoint_id_not_allowed(incoming_endpoint_id));
|
||||
}
|
||||
} else if access_blocker_flags.public_access_blocked {
|
||||
return Err(AuthError::NetworkNotAllowed);
|
||||
}
|
||||
|
||||
if !self
|
||||
.endpoint_rate_limiter
|
||||
.check(user_info.endpoint.clone().into(), 1)
|
||||
{
|
||||
return Err(AuthError::too_many_connections());
|
||||
}
|
||||
let role_access = backend.get_role_secret(ctx).await?;
|
||||
let Some(secret) = role_access.secret else {
|
||||
// If we don't have an authentication secret, for the http flow we can just return an error.
|
||||
info!("authentication info not found");
|
||||
return Err(AuthError::password_failed(&*user_info.user));
|
||||
let cached_secret = backend.get_role_secret(ctx).await?;
|
||||
let secret = match cached_secret.value.clone() {
|
||||
Some(secret) => self.config.authentication_config.check_rate_limit(
|
||||
ctx,
|
||||
secret,
|
||||
&user_info.endpoint,
|
||||
true,
|
||||
)?,
|
||||
None => {
|
||||
// If we don't have an authentication secret, for the http flow we can just return an error.
|
||||
info!("authentication info not found");
|
||||
return Err(AuthError::password_failed(&*user_info.user));
|
||||
}
|
||||
};
|
||||
let ep = EndpointIdInt::from(&user_info.endpoint);
|
||||
let auth_outcome = crate::auth::validate_password_and_exchange(
|
||||
&self.config.authentication_config.thread_pool,
|
||||
ep,
|
||||
@@ -184,13 +224,13 @@ impl PoolingBackend {
|
||||
let backend = self.auth_backend.as_ref().map(|()| keys);
|
||||
crate::proxy::connect_compute::connect_to_compute(
|
||||
ctx,
|
||||
&TokioMechanism {
|
||||
TokioMechanism {
|
||||
conn_id,
|
||||
conn_info,
|
||||
pool: self.pool.clone(),
|
||||
locks: &self.config.connect_compute_locks,
|
||||
},
|
||||
&backend,
|
||||
backend,
|
||||
self.config.wake_compute_retry_config,
|
||||
&self.config.connect_to_compute,
|
||||
)
|
||||
@@ -228,13 +268,13 @@ impl PoolingBackend {
|
||||
});
|
||||
crate::proxy::connect_compute::connect_to_compute(
|
||||
ctx,
|
||||
&HyperMechanism {
|
||||
HyperMechanism {
|
||||
conn_id,
|
||||
conn_info,
|
||||
pool: self.http_conn_pool.clone(),
|
||||
locks: &self.config.connect_compute_locks,
|
||||
},
|
||||
&backend,
|
||||
backend,
|
||||
self.config.wake_compute_retry_config,
|
||||
&self.config.connect_to_compute,
|
||||
)
|
||||
|
||||
@@ -41,7 +41,7 @@ use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio::time::timeout;
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio_util::task::TaskTracker;
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::{Instrument, info, warn};
|
||||
|
||||
use crate::cancellation::CancellationHandler;
|
||||
@@ -124,7 +124,6 @@ pub async fn task_main(
|
||||
let connections = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
connections.close(); // allows `connections.wait to complete`
|
||||
|
||||
let cancellations = tokio_util::task::task_tracker::TaskTracker::new();
|
||||
while let Some(res) = run_until_cancelled(ws_listener.accept(), &cancellation_token).await {
|
||||
let (conn, peer_addr) = res.context("could not accept TCP stream")?;
|
||||
if let Err(e) = conn.set_nodelay(true) {
|
||||
@@ -150,11 +149,11 @@ pub async fn task_main(
|
||||
let conn_token = cancellation_token.child_token();
|
||||
let tls_acceptor = tls_acceptor.clone();
|
||||
let backend = backend.clone();
|
||||
let connections2 = connections.clone();
|
||||
let cancellation_handler = cancellation_handler.clone();
|
||||
let endpoint_rate_limiter = endpoint_rate_limiter.clone();
|
||||
let cancellations = cancellations.clone();
|
||||
connections.spawn(
|
||||
|
||||
let tracker = connections.token();
|
||||
tokio::spawn(
|
||||
async move {
|
||||
let conn_token2 = conn_token.clone();
|
||||
let _cancel_guard = config.http_config.cancel_set.insert(conn_id, conn_token2);
|
||||
@@ -181,8 +180,7 @@ pub async fn task_main(
|
||||
Box::pin(connection_handler(
|
||||
config,
|
||||
backend,
|
||||
connections2,
|
||||
cancellations,
|
||||
tracker,
|
||||
cancellation_handler,
|
||||
endpoint_rate_limiter,
|
||||
conn_token,
|
||||
@@ -305,8 +303,7 @@ async fn connection_startup(
|
||||
async fn connection_handler(
|
||||
config: &'static ProxyConfig,
|
||||
backend: Arc<PoolingBackend>,
|
||||
connections: TaskTracker,
|
||||
cancellations: TaskTracker,
|
||||
tracker: TaskTrackerToken,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
cancellation_token: CancellationToken,
|
||||
@@ -347,19 +344,17 @@ async fn connection_handler(
|
||||
|
||||
// `request_handler` is not cancel safe. It expects to be cancelled only at specific times.
|
||||
// By spawning the future, we ensure it never gets cancelled until it decides to.
|
||||
let cancellations = cancellations.clone();
|
||||
let handler = connections.spawn(
|
||||
let handler = tokio::spawn(
|
||||
request_handler(
|
||||
req,
|
||||
config,
|
||||
backend.clone(),
|
||||
connections.clone(),
|
||||
tracker.clone(),
|
||||
cancellation_handler.clone(),
|
||||
session_id,
|
||||
conn_info2.clone(),
|
||||
http_request_token,
|
||||
endpoint_rate_limiter.clone(),
|
||||
cancellations,
|
||||
)
|
||||
.in_current_span()
|
||||
.map_ok_or_else(api_error_into_response, |r| r),
|
||||
@@ -400,14 +395,13 @@ async fn request_handler(
|
||||
mut request: hyper::Request<Incoming>,
|
||||
config: &'static ProxyConfig,
|
||||
backend: Arc<PoolingBackend>,
|
||||
ws_connections: TaskTracker,
|
||||
tracker: TaskTrackerToken,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
session_id: uuid::Uuid,
|
||||
conn_info: ConnectionInfo,
|
||||
// used to cancel in-flight HTTP requests. not used to cancel websockets
|
||||
http_cancellation_token: CancellationToken,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
cancellations: TaskTracker,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
|
||||
let host = request
|
||||
.headers()
|
||||
@@ -441,10 +435,17 @@ async fn request_handler(
|
||||
let (response, websocket) = framed_websockets::upgrade::upgrade(&mut request)
|
||||
.map_err(|e| ApiError::BadRequest(e.into()))?;
|
||||
|
||||
let cancellations = cancellations.clone();
|
||||
ws_connections.spawn(
|
||||
tokio::spawn(
|
||||
async move {
|
||||
if let Err(e) = websocket::serve_websocket(
|
||||
let websocket = match websocket.await {
|
||||
Err(e) => {
|
||||
warn!("could not upgrade websocket connection: {e:#}");
|
||||
return;
|
||||
}
|
||||
Ok(websocket) => websocket,
|
||||
};
|
||||
|
||||
websocket::serve_websocket(
|
||||
config,
|
||||
backend.auth_backend,
|
||||
ctx,
|
||||
@@ -452,12 +453,9 @@ async fn request_handler(
|
||||
cancellation_handler,
|
||||
endpoint_rate_limiter,
|
||||
host,
|
||||
cancellations,
|
||||
tracker,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!("error in websocket connection: {e:#}");
|
||||
}
|
||||
.await;
|
||||
}
|
||||
.instrument(span),
|
||||
);
|
||||
|
||||
@@ -17,6 +17,7 @@ use postgres_client::error::{DbError, ErrorPosition, SqlState};
|
||||
use postgres_client::{
|
||||
GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, RowStream, Transaction,
|
||||
};
|
||||
use pq_proto::StartupMessageParamsBuilder;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use serde_json::value::RawValue;
|
||||
@@ -40,7 +41,6 @@ use crate::context::RequestContext;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::http::{ReadBodyError, read_body_with_limit};
|
||||
use crate::metrics::{HttpDirection, Metrics, SniGroup, SniKind};
|
||||
use crate::pqproto::StartupMessageParams;
|
||||
use crate::proxy::{NeonOptions, run_until_cancelled};
|
||||
use crate::serverless::backend::HttpConnError;
|
||||
use crate::types::{DbName, RoleName};
|
||||
@@ -219,7 +219,7 @@ fn get_conn_info(
|
||||
|
||||
let mut options = Option::None;
|
||||
|
||||
let mut params = StartupMessageParams::default();
|
||||
let mut params = StartupMessageParamsBuilder::default();
|
||||
params.insert("user", &username);
|
||||
params.insert("database", &dbname);
|
||||
for (key, value) in pairs {
|
||||
|
||||
@@ -2,14 +2,14 @@ use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll, ready};
|
||||
|
||||
use anyhow::Context as _;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use framed_websockets::{Frame, OpCode, WebSocketServer};
|
||||
use futures::{Sink, Stream};
|
||||
use hyper::upgrade::OnUpgrade;
|
||||
use hyper::upgrade::Upgraded;
|
||||
use hyper_util::rt::TokioIo;
|
||||
use pin_project_lite::pin_project;
|
||||
use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::cancellation::CancellationHandler;
|
||||
@@ -17,7 +17,7 @@ use crate::config::ProxyConfig;
|
||||
use crate::context::RequestContext;
|
||||
use crate::error::ReportableError;
|
||||
use crate::metrics::Metrics;
|
||||
use crate::proxy::{ClientMode, ErrorSource, handle_client};
|
||||
use crate::proxy::{ClientMode, handle_client};
|
||||
use crate::rate_limiter::EndpointRateLimiter;
|
||||
|
||||
pin_project! {
|
||||
@@ -128,13 +128,12 @@ pub(crate) async fn serve_websocket(
|
||||
config: &'static ProxyConfig,
|
||||
auth_backend: &'static crate::auth::Backend<'static, ()>,
|
||||
ctx: RequestContext,
|
||||
websocket: OnUpgrade,
|
||||
websocket: Upgraded,
|
||||
cancellation_handler: Arc<CancellationHandler>,
|
||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||
hostname: Option<String>,
|
||||
cancellations: tokio_util::task::task_tracker::TaskTracker,
|
||||
) -> anyhow::Result<()> {
|
||||
let websocket = websocket.await?;
|
||||
tracker: TaskTrackerToken,
|
||||
) {
|
||||
let websocket = WebSocketServer::after_handshake(TokioIo::new(websocket));
|
||||
|
||||
let conn_gauge = Metrics::get()
|
||||
@@ -142,36 +141,28 @@ pub(crate) async fn serve_websocket(
|
||||
.client_connections
|
||||
.guard(crate::metrics::Protocol::Ws);
|
||||
|
||||
let res = Box::pin(handle_client(
|
||||
let mut ctx_slot = Some(ctx);
|
||||
let res = handle_client(
|
||||
config,
|
||||
auth_backend,
|
||||
&ctx,
|
||||
&mut ctx_slot,
|
||||
cancellation_handler,
|
||||
WebSocketRw::new(websocket),
|
||||
ClientMode::Websockets { hostname },
|
||||
endpoint_rate_limiter,
|
||||
conn_gauge,
|
||||
cancellations,
|
||||
))
|
||||
tracker,
|
||||
)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Err(e) => {
|
||||
match (ctx_slot, res) {
|
||||
(None, _) => {}
|
||||
(Some(ctx), Err(e)) => {
|
||||
ctx.set_error_kind(e.get_error_kind());
|
||||
Err(e.into())
|
||||
tracing::warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
|
||||
}
|
||||
Ok(None) => {
|
||||
(Some(ctx), Ok(())) => {
|
||||
ctx.set_success();
|
||||
Ok(())
|
||||
}
|
||||
Ok(Some(p)) => {
|
||||
ctx.set_success();
|
||||
ctx.log_connect();
|
||||
match p.proxy_pass(&config.connect_to_compute).await {
|
||||
Ok(()) => Ok(()),
|
||||
Err(ErrorSource::Client(err)) => Err(err).context("client"),
|
||||
Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,17 +2,20 @@ use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::{io, task};
|
||||
|
||||
use bytes::BytesMut;
|
||||
use pq_proto::framed::{ConnectionError, Framed};
|
||||
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, ProtocolError};
|
||||
use rustls::ServerConfig;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, ReadBuf};
|
||||
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
|
||||
use tokio_rustls::server::TlsStream;
|
||||
use tokio_util::task::task_tracker::TaskTrackerToken;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::control_plane::messages::ColdStartInfo;
|
||||
use crate::error::{ErrorKind, ReportableError, UserFacingError};
|
||||
use crate::metrics::Metrics;
|
||||
use crate::pqproto::{
|
||||
BeMessage, FE_PASSWORD_MESSAGE, FeStartupPacket, SQLSTATE_INTERNAL_ERROR, WriteBuf,
|
||||
read_message, read_startup,
|
||||
};
|
||||
use crate::tls::TlsServerEndPoint;
|
||||
|
||||
/// Stream wrapper which implements libpq's protocol.
|
||||
@@ -21,77 +24,61 @@ use crate::tls::TlsServerEndPoint;
|
||||
/// or [`AsyncWrite`] to prevent subtle errors (e.g. trying
|
||||
/// to pass random malformed bytes through the connection).
|
||||
pub struct PqStream<S> {
|
||||
stream: S,
|
||||
read: Vec<u8>,
|
||||
write: WriteBuf,
|
||||
pub(crate) framed: Framed<S>,
|
||||
pub(crate) tracker: TaskTrackerToken,
|
||||
}
|
||||
|
||||
impl<S> PqStream<S> {
|
||||
pub fn get_ref(&self) -> &S {
|
||||
&self.stream
|
||||
/// Construct a new libpq protocol wrapper.
|
||||
pub fn new(stream: S, tracker: TaskTrackerToken) -> Self {
|
||||
Self {
|
||||
framed: Framed::new(stream),
|
||||
tracker,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new libpq protocol wrapper over a stream without the first startup message.
|
||||
#[cfg(test)]
|
||||
pub fn new_skip_handshake(stream: S) -> Self {
|
||||
Self {
|
||||
stream,
|
||||
read: Vec::new(),
|
||||
write: WriteBuf::new(),
|
||||
}
|
||||
/// Extract the underlying stream and read buffer.
|
||||
pub fn into_inner(self) -> (S, BytesMut, TaskTrackerToken) {
|
||||
let (stream, read) = self.framed.into_inner();
|
||||
(stream, read, self.tracker)
|
||||
}
|
||||
|
||||
/// Get a shared reference to the underlying stream.
|
||||
pub(crate) fn get_ref(&self) -> &S {
|
||||
self.framed.get_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + AsyncWrite + Unpin> PqStream<S> {
|
||||
/// Construct a new libpq protocol wrapper and read the first startup message.
|
||||
///
|
||||
/// This is not cancel safe.
|
||||
pub async fn parse_startup(mut stream: S) -> io::Result<(Self, FeStartupPacket)> {
|
||||
let startup = read_startup(&mut stream).await?;
|
||||
Ok((
|
||||
Self {
|
||||
stream,
|
||||
read: Vec::new(),
|
||||
write: WriteBuf::new(),
|
||||
},
|
||||
startup,
|
||||
))
|
||||
}
|
||||
|
||||
/// Tell the client that encryption is not supported.
|
||||
///
|
||||
/// This is not cancel safe
|
||||
pub async fn reject_encryption(&mut self) -> io::Result<FeStartupPacket> {
|
||||
// N for No.
|
||||
self.write.encryption(b'N');
|
||||
self.flush().await?;
|
||||
read_startup(&mut self.stream).await
|
||||
}
|
||||
fn err_connection() -> io::Error {
|
||||
io::Error::new(io::ErrorKind::ConnectionAborted, "connection is lost")
|
||||
}
|
||||
|
||||
impl<S: AsyncRead + Unpin> PqStream<S> {
|
||||
/// Read a raw postgres packet, which will respect the max length requested.
|
||||
/// This is not cancel safe.
|
||||
async fn read_raw_expect(&mut self, tag: u8, max: u32) -> io::Result<&mut [u8]> {
|
||||
let (actual_tag, msg) = read_message(&mut self.stream, &mut self.read, max).await?;
|
||||
if actual_tag != tag {
|
||||
return Err(io::Error::other(format!(
|
||||
"incorrect message tag, expected {:?}, got {:?}",
|
||||
tag as char, actual_tag as char,
|
||||
)));
|
||||
}
|
||||
Ok(msg)
|
||||
/// Receive [`FeStartupPacket`], which is a first packet sent by a client.
|
||||
pub async fn read_startup_packet(&mut self) -> io::Result<FeStartupPacket> {
|
||||
self.framed
|
||||
.read_startup_message()
|
||||
.await
|
||||
.map_err(ConnectionError::into_io_error)?
|
||||
.ok_or_else(err_connection)
|
||||
}
|
||||
|
||||
/// Read a postgres password message, which will respect the max length requested.
|
||||
/// This is not cancel safe.
|
||||
pub async fn read_password_message(&mut self) -> io::Result<&mut [u8]> {
|
||||
// passwords are usually pretty short
|
||||
// and SASL SCRAM messages are no longer than 256 bytes in my testing
|
||||
// (a few hashes and random bytes, encoded into base64).
|
||||
const MAX_PASSWORD_LENGTH: u32 = 512;
|
||||
self.read_raw_expect(FE_PASSWORD_MESSAGE, MAX_PASSWORD_LENGTH)
|
||||
async fn read_message(&mut self) -> io::Result<FeMessage> {
|
||||
self.framed
|
||||
.read_message()
|
||||
.await
|
||||
.map_err(ConnectionError::into_io_error)?
|
||||
.ok_or_else(err_connection)
|
||||
}
|
||||
|
||||
pub(crate) async fn read_password_message(&mut self) -> io::Result<bytes::Bytes> {
|
||||
match self.read_message().await? {
|
||||
FeMessage::PasswordMessage(msg) => Ok(msg),
|
||||
bad => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!("unexpected message type: {bad:?}"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,16 +88,6 @@ pub struct ReportedError {
|
||||
error_kind: ErrorKind,
|
||||
}
|
||||
|
||||
impl ReportedError {
|
||||
pub fn new(e: (impl UserFacingError + Into<anyhow::Error>)) -> Self {
|
||||
let error_kind = e.get_error_kind();
|
||||
Self {
|
||||
source: e.into(),
|
||||
error_kind,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ReportedError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.source.fmt(f)
|
||||
@@ -129,65 +106,109 @@ impl ReportableError for ReportedError {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
enum ErrorTag {
|
||||
#[serde(rename = "proxy")]
|
||||
Proxy,
|
||||
#[serde(rename = "compute")]
|
||||
Compute,
|
||||
#[serde(rename = "client")]
|
||||
Client,
|
||||
#[serde(rename = "controlplane")]
|
||||
ControlPlane,
|
||||
#[serde(rename = "other")]
|
||||
Other,
|
||||
}
|
||||
|
||||
impl From<ErrorKind> for ErrorTag {
|
||||
fn from(error_kind: ErrorKind) -> Self {
|
||||
match error_kind {
|
||||
ErrorKind::User => Self::Client,
|
||||
ErrorKind::ClientDisconnect => Self::Client,
|
||||
ErrorKind::RateLimit => Self::Proxy,
|
||||
ErrorKind::ServiceRateLimit => Self::Proxy, // considering rate limit as proxy error for SLI
|
||||
ErrorKind::Quota => Self::Proxy,
|
||||
ErrorKind::Service => Self::Proxy,
|
||||
ErrorKind::ControlPlane => Self::ControlPlane,
|
||||
ErrorKind::Postgres => Self::Other,
|
||||
ErrorKind::Compute => Self::Compute,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
struct ProbeErrorData {
|
||||
tag: ErrorTag,
|
||||
msg: String,
|
||||
cold_start_info: Option<ColdStartInfo>,
|
||||
}
|
||||
|
||||
impl<S: AsyncWrite + Unpin> PqStream<S> {
|
||||
/// Tell the client that we are willing to accept SSL.
|
||||
/// This is not cancel safe
|
||||
pub async fn accept_tls(mut self) -> io::Result<S> {
|
||||
// S for SSL.
|
||||
self.write.encryption(b'S');
|
||||
self.flush().await?;
|
||||
Ok(self.stream)
|
||||
}
|
||||
|
||||
/// Assert that we are using direct TLS.
|
||||
pub fn accept_direct_tls(self) -> S {
|
||||
self.stream
|
||||
}
|
||||
|
||||
/// Write a raw message to the internal buffer.
|
||||
pub fn write_raw(&mut self, size_hint: usize, tag: u8, f: impl FnOnce(&mut Vec<u8>)) {
|
||||
self.write.write_raw(size_hint, tag, f);
|
||||
}
|
||||
|
||||
/// Write the message into an internal buffer
|
||||
pub fn write_message(&mut self, message: BeMessage<'_>) {
|
||||
message.write_message(&mut self.write);
|
||||
}
|
||||
|
||||
/// Flush the output buffer into the underlying stream.
|
||||
///
|
||||
/// This is cancel safe.
|
||||
pub async fn flush(&mut self) -> io::Result<()> {
|
||||
self.stream.write_all_buf(&mut self.write).await?;
|
||||
self.write.reset();
|
||||
|
||||
self.stream.flush().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Flush the output buffer into the underlying stream.
|
||||
///
|
||||
/// This is cancel safe.
|
||||
pub async fn flush_and_into_inner(mut self) -> io::Result<S> {
|
||||
self.flush().await?;
|
||||
Ok(self.stream)
|
||||
}
|
||||
|
||||
/// Write the error message to the client, then re-throw it.
|
||||
///
|
||||
/// Trait [`UserFacingError`] acts as an allowlist for error types.
|
||||
/// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
|
||||
pub(crate) async fn throw_error<E>(
|
||||
/// Write the message into an internal buffer, but don't flush the underlying stream.
|
||||
pub(crate) fn write_message_noflush(
|
||||
&mut self,
|
||||
error: E,
|
||||
message: &BeMessage<'_>,
|
||||
) -> io::Result<&mut Self> {
|
||||
self.framed
|
||||
.write_message(message)
|
||||
.map_err(ProtocolError::into_io_error)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Write the message into an internal buffer and flush it.
|
||||
pub async fn write_message(&mut self, message: &BeMessage<'_>) -> io::Result<&mut Self> {
|
||||
self.write_message_noflush(message)?;
|
||||
self.flush().await?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Flush the output buffer into the underlying stream.
|
||||
pub(crate) async fn flush(&mut self) -> io::Result<&mut Self> {
|
||||
self.framed.flush().await?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Writes message with the given error kind to the stream.
|
||||
/// Used only for probe queries
|
||||
async fn write_format_message(
|
||||
&mut self,
|
||||
msg: &str,
|
||||
error_kind: ErrorKind,
|
||||
ctx: Option<&crate::context::RequestContext>,
|
||||
) -> ReportedError
|
||||
where
|
||||
E: UserFacingError + Into<anyhow::Error>,
|
||||
{
|
||||
let error_kind = error.get_error_kind();
|
||||
let msg = error.to_string_client();
|
||||
) -> String {
|
||||
let formatted_msg = match ctx {
|
||||
Some(ctx) if ctx.get_testodrome_id().is_some() => {
|
||||
serde_json::to_string(&ProbeErrorData {
|
||||
tag: ErrorTag::from(error_kind),
|
||||
msg: msg.to_string(),
|
||||
cold_start_info: Some(ctx.cold_start_info()),
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
_ => msg.to_string(),
|
||||
};
|
||||
|
||||
// already error case, ignore client IO error
|
||||
self.write_message(&BeMessage::ErrorResponse(&formatted_msg, None))
|
||||
.await
|
||||
.inspect_err(|e| debug!("write_message failed: {e}"))
|
||||
.ok();
|
||||
|
||||
formatted_msg
|
||||
}
|
||||
|
||||
/// Write the error message using [`Self::write_format_message`], then re-throw it.
|
||||
/// Allowing string literals is safe under the assumption they might not contain any runtime info.
|
||||
/// This method exists due to `&str` not implementing `Into<anyhow::Error>`.
|
||||
/// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
|
||||
pub async fn throw_error_str<T>(
|
||||
&mut self,
|
||||
msg: &'static str,
|
||||
error_kind: ErrorKind,
|
||||
ctx: Option<&crate::context::RequestContext>,
|
||||
) -> Result<T, ReportedError> {
|
||||
self.write_format_message(msg, error_kind, ctx).await;
|
||||
|
||||
if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {
|
||||
tracing::info!(
|
||||
@@ -197,39 +218,39 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
|
||||
);
|
||||
}
|
||||
|
||||
let probe_msg;
|
||||
let mut msg = &*msg;
|
||||
if let Some(ctx) = ctx {
|
||||
if ctx.get_testodrome_id().is_some() {
|
||||
let tag = match error_kind {
|
||||
ErrorKind::User => "client",
|
||||
ErrorKind::ClientDisconnect => "client",
|
||||
ErrorKind::RateLimit => "proxy",
|
||||
ErrorKind::ServiceRateLimit => "proxy",
|
||||
ErrorKind::Quota => "proxy",
|
||||
ErrorKind::Service => "proxy",
|
||||
ErrorKind::ControlPlane => "controlplane",
|
||||
ErrorKind::Postgres => "other",
|
||||
ErrorKind::Compute => "compute",
|
||||
};
|
||||
probe_msg = typed_json::json!({
|
||||
"tag": tag,
|
||||
"msg": msg,
|
||||
"cold_start_info": ctx.cold_start_info(),
|
||||
})
|
||||
.to_string();
|
||||
msg = &probe_msg;
|
||||
}
|
||||
Err(ReportedError {
|
||||
source: anyhow::anyhow!(msg),
|
||||
error_kind,
|
||||
})
|
||||
}
|
||||
|
||||
/// Write the error message using [`Self::write_format_message`], then re-throw it.
|
||||
/// Trait [`UserFacingError`] acts as an allowlist for error types.
|
||||
/// If `ctx` is provided and has testodrome_id set, error messages will be prefixed according to error kind.
|
||||
pub(crate) async fn throw_error<T, E>(
|
||||
&mut self,
|
||||
error: E,
|
||||
ctx: Option<&crate::context::RequestContext>,
|
||||
) -> Result<T, ReportedError>
|
||||
where
|
||||
E: UserFacingError + Into<anyhow::Error>,
|
||||
{
|
||||
let error_kind = error.get_error_kind();
|
||||
let msg = error.to_string_client();
|
||||
self.write_format_message(&msg, error_kind, ctx).await;
|
||||
if error_kind != ErrorKind::RateLimit && error_kind != ErrorKind::User {
|
||||
tracing::info!(
|
||||
kind=error_kind.to_metric_label(),
|
||||
error=%error,
|
||||
msg,
|
||||
"forwarding error to user",
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: either preserve the error code from postgres, or assign error codes to proxy errors.
|
||||
self.write.write_error(msg, SQLSTATE_INTERNAL_ERROR);
|
||||
|
||||
self.flush()
|
||||
.await
|
||||
.unwrap_or_else(|e| tracing::debug!("write_message failed: {e}"));
|
||||
|
||||
ReportedError::new(error)
|
||||
Err(ReportedError {
|
||||
source: anyhow::anyhow!(error),
|
||||
error_kind,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,9 +31,7 @@ mod private {
|
||||
type Output = io::Result<RustlsStream<S>>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
Pin::new(&mut self.inner)
|
||||
.poll(cx)
|
||||
.map_ok(|s| RustlsStream(Box::new(s)))
|
||||
Pin::new(&mut self.inner).poll(cx).map_ok(RustlsStream)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,7 +57,7 @@ mod private {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RustlsStream<S>(Box<TlsStream<S>>);
|
||||
pub struct RustlsStream<S>(TlsStream<S>);
|
||||
|
||||
impl<S> postgres_client::tls::TlsStream for RustlsStream<S>
|
||||
where
|
||||
|
||||
@@ -139,14 +139,6 @@ pub(crate) struct StorageControllerMetricGroup {
|
||||
/// HTTP request status counters for handled requests
|
||||
pub(crate) storage_controller_reconcile_long_running:
|
||||
measured::CounterVec<ReconcileLongRunningLabelGroupSet>,
|
||||
|
||||
/// Indicator of safekeeper reconciler queue depth, broken down by safekeeper, excluding ongoing reconciles.
|
||||
pub(crate) storage_controller_safkeeper_reconciles_queued:
|
||||
measured::GaugeVec<SafekeeperReconcilerLabelGroupSet>,
|
||||
|
||||
/// Indicator of completed safekeeper reconciles, broken down by safekeeper.
|
||||
pub(crate) storage_controller_safkeeper_reconciles_complete:
|
||||
measured::CounterVec<SafekeeperReconcilerLabelGroupSet>,
|
||||
}
|
||||
|
||||
impl StorageControllerMetrics {
|
||||
@@ -265,17 +257,6 @@ pub(crate) enum Method {
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(measured::LabelGroup, Clone)]
|
||||
#[label(set = SafekeeperReconcilerLabelGroupSet)]
|
||||
pub(crate) struct SafekeeperReconcilerLabelGroup<'a> {
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
pub(crate) sk_az: &'a str,
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
pub(crate) sk_node_id: &'a str,
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
pub(crate) sk_hostname: &'a str,
|
||||
}
|
||||
|
||||
impl From<hyper::Method> for Method {
|
||||
fn from(value: hyper::Method) -> Self {
|
||||
if value == hyper::Method::GET {
|
||||
|
||||
@@ -20,9 +20,7 @@ use utils::{
|
||||
};
|
||||
|
||||
use crate::{
|
||||
metrics::{METRICS_REGISTRY, SafekeeperReconcilerLabelGroup},
|
||||
persistence::SafekeeperTimelineOpKind,
|
||||
safekeeper::Safekeeper,
|
||||
persistence::SafekeeperTimelineOpKind, safekeeper::Safekeeper,
|
||||
safekeeper_client::SafekeeperClient,
|
||||
};
|
||||
|
||||
@@ -220,26 +218,7 @@ impl ReconcilerHandle {
|
||||
fn schedule_reconcile(&self, req: ScheduleRequest) {
|
||||
let (cancel, token_id) = self.new_token_slot(req.tenant_id, req.timeline_id);
|
||||
let hostname = req.safekeeper.skp.host.clone();
|
||||
let sk_az = req.safekeeper.skp.availability_zone_id.clone();
|
||||
let sk_node_id = req.safekeeper.get_id().to_string();
|
||||
|
||||
// We don't have direct access to the queue depth here, so increase it blindly by 1.
|
||||
// We know that putting into the queue increases the queue depth. The receiver will
|
||||
// update with the correct value once it processes the next item. To avoid races where we
|
||||
// reduce before we increase, leaving the gauge with a 1 value for a long time, we
|
||||
// increase it before putting into the queue.
|
||||
let queued_gauge = &METRICS_REGISTRY
|
||||
.metrics_group
|
||||
.storage_controller_safkeeper_reconciles_queued;
|
||||
let label_group = SafekeeperReconcilerLabelGroup {
|
||||
sk_az: &sk_az,
|
||||
sk_node_id: &sk_node_id,
|
||||
sk_hostname: &hostname,
|
||||
};
|
||||
queued_gauge.inc(label_group.clone());
|
||||
|
||||
if let Err(err) = self.tx.send((req, cancel, token_id)) {
|
||||
queued_gauge.set(label_group, 0);
|
||||
tracing::info!("scheduling request onto {hostname} returned error: {err}");
|
||||
}
|
||||
}
|
||||
@@ -304,18 +283,6 @@ impl SafekeeperReconciler {
|
||||
continue;
|
||||
}
|
||||
|
||||
let queued_gauge = &METRICS_REGISTRY
|
||||
.metrics_group
|
||||
.storage_controller_safkeeper_reconciles_queued;
|
||||
queued_gauge.set(
|
||||
SafekeeperReconcilerLabelGroup {
|
||||
sk_az: &req.safekeeper.skp.availability_zone_id,
|
||||
sk_node_id: &req.safekeeper.get_id().to_string(),
|
||||
sk_hostname: &req.safekeeper.skp.host,
|
||||
},
|
||||
self.rx.len() as i64,
|
||||
);
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
let kind = req.kind;
|
||||
let tenant_id = req.tenant_id;
|
||||
@@ -544,16 +511,6 @@ impl SafekeeperReconcilerInner {
|
||||
req.generation,
|
||||
)
|
||||
.await;
|
||||
|
||||
let complete_counter = &METRICS_REGISTRY
|
||||
.metrics_group
|
||||
.storage_controller_safkeeper_reconciles_complete;
|
||||
complete_counter.inc(SafekeeperReconcilerLabelGroup {
|
||||
sk_az: &req.safekeeper.skp.availability_zone_id,
|
||||
sk_node_id: &req.safekeeper.get_id().to_string(),
|
||||
sk_hostname: &req.safekeeper.skp.host,
|
||||
});
|
||||
|
||||
if let Err(err) = res {
|
||||
tracing::info!(
|
||||
"couldn't remove reconciliation request onto {} from persistence: {err:?}",
|
||||
|
||||
@@ -287,17 +287,6 @@ def test_pgdata_import_smoke(
|
||||
with pytest.raises(psycopg2.errors.UndefinedTable):
|
||||
br_initdb_endpoint.safe_psql(f"select * from {workload.table}")
|
||||
|
||||
# The storage controller might be overly eager and attempt to finalize
|
||||
# the import before the task got a chance to exit.
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
".*Call to node.*management API.*failed.*Import task still running.*",
|
||||
]
|
||||
)
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend([".*Error processing HTTP request.*Import task not done yet.*"])
|
||||
|
||||
|
||||
@run_only_on_default_postgres(reason="PG version is irrelevant here")
|
||||
def test_import_completion_on_restart(
|
||||
@@ -482,17 +471,6 @@ def test_import_respects_timeline_lifecycle(
|
||||
else:
|
||||
raise RuntimeError(f"{action} param not recognized")
|
||||
|
||||
# The storage controller might be overly eager and attempt to finalize
|
||||
# the import before the task got a chance to exit.
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
".*Call to node.*management API.*failed.*Import task still running.*",
|
||||
]
|
||||
)
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.extend([".*Error processing HTTP request.*Import task not done yet.*"])
|
||||
|
||||
|
||||
@skip_in_debug_build("Validation query takes too long in debug builds")
|
||||
def test_import_chaos(
|
||||
|
||||
@@ -124,9 +124,6 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
|
||||
".*downloading failed, possibly for shutdown",
|
||||
# {tenant_id=... timeline_id=...}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1664/0/1260 blkno=0 req_lsn=0/149F0D8}: error reading relation or page version: Not found: will not become active. Current state: Stopping\n'
|
||||
".*page_service.*will not become active.*",
|
||||
# the following errors are possible when pageserver tries to ingest wal records despite being in unreadable state
|
||||
".*wal_connection_manager.*layer file download failed: No file found.*",
|
||||
".*wal_connection_manager.*could not ingest record.*",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -159,45 +156,6 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
|
||||
env.pageservers[2].id: ("Detached", None),
|
||||
}
|
||||
|
||||
# Track all the attached locations with mode and generation
|
||||
history: list[tuple[int, str, int | None]] = []
|
||||
|
||||
def may_read(pageserver: NeonPageserver, mode: str, generation: int | None) -> bool:
|
||||
# Rules for when a pageserver may read:
|
||||
# - our generation is higher than any previous
|
||||
# - our generation is equal to previous, but no other pageserver
|
||||
# in that generation has been AttachedSingle (i.e. allowed to compact/GC)
|
||||
# - our generation is equal to previous, and the previous holder of this
|
||||
# generation was the same node as we're attaching now.
|
||||
#
|
||||
# If these conditions are not met, then a read _might_ work, but the pageserver might
|
||||
# also hit errors trying to download layers.
|
||||
highest_historic_generation = max([i[2] for i in history if i[2] is not None], default=None)
|
||||
|
||||
if generation is None:
|
||||
# We're not in an attached state, we may not read
|
||||
return False
|
||||
elif highest_historic_generation is not None and generation < highest_historic_generation:
|
||||
# We are in an outdated generation, we may not read
|
||||
return False
|
||||
elif highest_historic_generation is not None and generation == highest_historic_generation:
|
||||
# We are re-using a generation: if any pageserver other than this one
|
||||
# has held AttachedSingle mode, this node may not read (because some other
|
||||
# node may be doing GC/compaction).
|
||||
if any(
|
||||
i[1] == "AttachedSingle"
|
||||
and i[2] == highest_historic_generation
|
||||
and i[0] != pageserver.id
|
||||
for i in history
|
||||
):
|
||||
log.info(
|
||||
f"Skipping read on {pageserver.id} because other pageserver has been in AttachedSingle mode in generation {highest_historic_generation}"
|
||||
)
|
||||
return False
|
||||
|
||||
# Fall through: we have passed conditions for readability
|
||||
return True
|
||||
|
||||
latest_attached = env.pageservers[0].id
|
||||
|
||||
for _i in range(0, 64):
|
||||
@@ -241,10 +199,9 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
|
||||
assert len(tenants) == 1
|
||||
assert tenants[0]["generation"] == new_generation
|
||||
|
||||
if may_read(pageserver, last_state_ps[0], last_state_ps[1]):
|
||||
log.info("Entering postgres...")
|
||||
workload.churn_rows(rng.randint(128, 256), pageserver.id)
|
||||
workload.validate(pageserver.id)
|
||||
log.info("Entering postgres...")
|
||||
workload.churn_rows(rng.randint(128, 256), pageserver.id)
|
||||
workload.validate(pageserver.id)
|
||||
elif last_state_ps[0].startswith("Attached"):
|
||||
# The `storage_controller` will only re-attach on startup when a pageserver was the
|
||||
# holder of the latest generation: otherwise the pageserver will revert to detached
|
||||
@@ -284,16 +241,18 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
|
||||
location_conf["generation"] = generation
|
||||
|
||||
pageserver.tenant_location_configure(tenant_id, location_conf)
|
||||
|
||||
last_state[pageserver.id] = (mode, generation)
|
||||
|
||||
may_read_this_generation = may_read(pageserver, mode, generation)
|
||||
history.append((pageserver.id, mode, generation))
|
||||
# It's only valid to connect to the last generation. Newer generations may yank layer
|
||||
# files used in older generations.
|
||||
last_generation = max(
|
||||
[s[1] for s in last_state.values() if s[1] is not None], default=None
|
||||
)
|
||||
|
||||
# This is a basic test: we are validating that he endpoint works properly _between_
|
||||
# configuration changes. A stronger test would be to validate that clients see
|
||||
# no errors while we are making the changes.
|
||||
if may_read_this_generation:
|
||||
if mode.startswith("Attached") and generation == last_generation:
|
||||
# This is a basic test: we are validating that he endpoint works properly _between_
|
||||
# configuration changes. A stronger test would be to validate that clients see
|
||||
# no errors while we are making the changes.
|
||||
workload.churn_rows(
|
||||
rng.randint(128, 256), pageserver.id, upload=mode != "AttachedStale"
|
||||
)
|
||||
@@ -306,16 +265,9 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
|
||||
assert gc_summary["remote_storage_errors"] == 0
|
||||
assert gc_summary["indices_deleted"] > 0
|
||||
|
||||
# Attach all pageservers, in a higher generation than any previous. We will use the same
|
||||
# gen for all, and AttachedMulti mode so that they do not interfere with one another.
|
||||
generation = env.storage_controller.attach_hook_issue(tenant_id, env.pageservers[0].id)
|
||||
# Attach all pageservers
|
||||
for ps in env.pageservers:
|
||||
location_conf = {
|
||||
"mode": "AttachedMulti",
|
||||
"secondary_conf": None,
|
||||
"tenant_conf": {},
|
||||
"generation": generation,
|
||||
}
|
||||
location_conf = {"mode": "AttachedMulti", "secondary_conf": None, "tenant_conf": {}}
|
||||
ps.tenant_location_configure(tenant_id, location_conf)
|
||||
|
||||
# Confirm that all are readable
|
||||
|
||||
@@ -1388,10 +1388,12 @@ def test_sharding_split_failures(
|
||||
with pytest.raises(failure.expect_exception()):
|
||||
env.storage_controller.tenant_shard_split(tenant_id, shard_count=4)
|
||||
|
||||
def assert_shard_count(shard_count: int, exclude_ps_id: int | None = None) -> None:
|
||||
# We expect that the overall operation will fail, but some split requests
|
||||
# will have succeeded: the net result should be to return to a clean state, including
|
||||
# detaching any child shards.
|
||||
def assert_rolled_back(exclude_ps_id=None) -> None:
|
||||
secondary_count = 0
|
||||
attached_count = 0
|
||||
log.info(f"Iterating over {len(env.pageservers)} pageservers to check shard count")
|
||||
for ps in env.pageservers:
|
||||
if exclude_ps_id is not None and ps.id == exclude_ps_id:
|
||||
continue
|
||||
@@ -1402,23 +1404,35 @@ def test_sharding_split_failures(
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == shard_count
|
||||
assert tenant_shard_id.shard_count == initial_shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
secondary_count += 1
|
||||
else:
|
||||
attached_count += 1
|
||||
|
||||
assert secondary_count == shard_count
|
||||
assert attached_count == shard_count
|
||||
|
||||
# We expect that the overall operation will fail, but some split requests
|
||||
# will have succeeded: the net result should be to return to a clean state, including
|
||||
# detaching any child shards.
|
||||
def assert_rolled_back(exclude_ps_id: int | None = None) -> None:
|
||||
assert_shard_count(initial_shard_count, exclude_ps_id)
|
||||
assert secondary_count == initial_shard_count
|
||||
assert attached_count == initial_shard_count
|
||||
|
||||
def assert_split_done(exclude_ps_id: int | None = None) -> None:
|
||||
assert_shard_count(split_shard_count, exclude_ps_id)
|
||||
secondary_count = 0
|
||||
attached_count = 0
|
||||
for ps in env.pageservers:
|
||||
if exclude_ps_id is not None and ps.id == exclude_ps_id:
|
||||
continue
|
||||
|
||||
locations = ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
for loc in locations:
|
||||
tenant_shard_id = TenantShardId.parse(loc[0])
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == split_shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
secondary_count += 1
|
||||
else:
|
||||
attached_count += 1
|
||||
assert attached_count == split_shard_count
|
||||
assert secondary_count == split_shard_count
|
||||
|
||||
def finish_split():
|
||||
# Having failed+rolled back, we should be able to split again
|
||||
@@ -1454,7 +1468,6 @@ def test_sharding_split_failures(
|
||||
|
||||
# The split should appear to be rolled back from the point of view of all pageservers
|
||||
# apart from the one that is offline
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
wait_until(lambda: assert_rolled_back(exclude_ps_id=failure.pageserver_id))
|
||||
|
||||
finish_split()
|
||||
@@ -1469,7 +1482,6 @@ def test_sharding_split_failures(
|
||||
log.info("Clearing failure...")
|
||||
failure.clear(env)
|
||||
|
||||
env.storage_controller.reconcile_until_idle()
|
||||
wait_until(assert_rolled_back)
|
||||
|
||||
# Having rolled back, the tenant should be working
|
||||
|
||||
Reference in New Issue
Block a user