mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-19 11:22:56 +00:00
Compare commits
80 Commits
release-pr
...
conrad/pro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01475c9e75 | ||
|
|
c835bbba1f | ||
|
|
f94dde4432 | ||
|
|
4991a85704 | ||
|
|
7838659197 | ||
|
|
3f1c542957 | ||
|
|
ec4072f845 | ||
|
|
56f867bde5 | ||
|
|
d1ab7471e2 | ||
|
|
6ff4175fd7 | ||
|
|
6331cb2161 | ||
|
|
71f38d1354 | ||
|
|
c0ba416967 | ||
|
|
13e8105740 | ||
|
|
db79304416 | ||
|
|
ffc9c33eb2 | ||
|
|
ed2d892113 | ||
|
|
131585eb6b | ||
|
|
0bab7e3086 | ||
|
|
e6cd5050fc | ||
|
|
60c0d19f57 | ||
|
|
dec2e2fb29 | ||
|
|
699a213c5d | ||
|
|
9a4157dadb | ||
|
|
bd52822e14 | ||
|
|
dcd016bbfc | ||
|
|
7b18e33997 | ||
|
|
9d75218ba7 | ||
|
|
1b3558df7a | ||
|
|
68205c48ed | ||
|
|
8d93d02c2f | ||
|
|
023821a80c | ||
|
|
944c1adc4c | ||
|
|
ca85f364ba | ||
|
|
9ef0662a42 | ||
|
|
3baef0bca3 | ||
|
|
f312c6571f | ||
|
|
27a42d0f96 | ||
|
|
b04ab468ee | ||
|
|
dcb629532b | ||
|
|
71d004289c | ||
|
|
4d422b937c | ||
|
|
bbe4dfa991 | ||
|
|
dcb24ce170 | ||
|
|
a2a942f93c | ||
|
|
cb10be710d | ||
|
|
15d01b257a | ||
|
|
aaee713e53 | ||
|
|
2e9207fdf3 | ||
|
|
d8ebd33fe6 | ||
|
|
2dc238e5b3 | ||
|
|
243bca1c49 | ||
|
|
fa909c27fc | ||
|
|
1b60571636 | ||
|
|
c18716bb3f | ||
|
|
cd1d2d1996 | ||
|
|
bd09369198 | ||
|
|
5330122049 | ||
|
|
45658ccccb | ||
|
|
14853a3284 | ||
|
|
aad809b048 | ||
|
|
fae8e7ba76 | ||
|
|
97a9abd181 | ||
|
|
4abc8e5282 | ||
|
|
aa4ec11af9 | ||
|
|
973a8d2680 | ||
|
|
c848f25ec2 | ||
|
|
d5624cc505 | ||
|
|
538e2312a6 | ||
|
|
a6073b5013 | ||
|
|
ea3798e3b3 | ||
|
|
1d642d6a57 | ||
|
|
3ffe6de0b9 | ||
|
|
42fb3c4d30 | ||
|
|
e04dd3be0b | ||
|
|
eb520a14ce | ||
|
|
eb5d832e6f | ||
|
|
70780e310c | ||
|
|
e82f7f0dfc | ||
|
|
8173dc600a |
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -133,9 +133,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.71"
|
||||
version = "1.0.94"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
|
||||
checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet',
|
||||
import 'sql_exporter/compute_current_lsn.libsonnet',
|
||||
import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
|
||||
import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet',
|
||||
import 'sql_exporter/compute_max_connections.libsonnet',
|
||||
import 'sql_exporter/compute_receive_lsn.libsonnet',
|
||||
import 'sql_exporter/compute_subscriptions_count.libsonnet',
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
SELECT
|
||||
(SELECT current_setting('neon.timeline_id')) AS timeline_id,
|
||||
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
|
||||
-- These temporary snapshot files are renamed to the actual snapshot files
|
||||
-- after they are completely built. We only WAL-log the completely built
|
||||
-- snapshot files
|
||||
(SELECT COALESCE(sum(size), 0) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS logical_snapshots_bytes;
|
||||
@@ -0,0 +1,17 @@
|
||||
local neon = import 'neon.libsonnet';
|
||||
|
||||
local pg_ls_logicalsnapdir = importstr 'sql_exporter/compute_logical_snapshots_bytes.15.sql';
|
||||
local pg_ls_dir = importstr 'sql_exporter/compute_logical_snapshots_bytes.sql';
|
||||
|
||||
{
|
||||
metric_name: 'compute_logical_snapshots_bytes',
|
||||
type: 'gauge',
|
||||
help: 'Size of the pg_logical/snapshots directory, not including temporary files',
|
||||
key_labels: [
|
||||
'timeline_id',
|
||||
],
|
||||
values: [
|
||||
'logical_snapshots_bytes',
|
||||
],
|
||||
query: if neon.PG_MAJORVERSION_NUM < 15 then pg_ls_dir else pg_ls_logicalsnapdir,
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
SELECT
|
||||
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
|
||||
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp.
|
||||
-- These temporary snapshot files are renamed to the actual snapshot files
|
||||
-- after they are completely built. We only WAL-log the completely built
|
||||
-- snapshot files
|
||||
(SELECT COALESCE(sum((pg_stat_file('pg_logical/snapshots/' || name, missing_ok => true)).size), 0)
|
||||
FROM (SELECT * FROM pg_ls_dir('pg_logical/snapshots') WHERE pg_ls_dir LIKE '%.snap') AS name
|
||||
) AS logical_snapshots_bytes;
|
||||
@@ -1243,12 +1243,7 @@ impl ComputeNode {
|
||||
let postgresql_conf_path = pgdata_path.join("postgresql.conf");
|
||||
config::write_postgres_conf(&postgresql_conf_path, &spec, self.http_port)?;
|
||||
|
||||
// TODO(ololobus): We need a concurrency during reconfiguration as well,
|
||||
// but DB is already running and used by user. We can easily get out of
|
||||
// `max_connections` limit, and the current code won't handle that.
|
||||
// let compute_state = self.state.lock().unwrap().clone();
|
||||
// let max_concurrent_connections = self.max_service_connections(&compute_state, &spec);
|
||||
let max_concurrent_connections = 1;
|
||||
let max_concurrent_connections = spec.reconfigure_concurrency;
|
||||
|
||||
// Temporarily reset max_cluster_size in config
|
||||
// to avoid the possibility of hitting the limit, while we are reconfiguring:
|
||||
|
||||
@@ -53,6 +53,7 @@ use compute_api::spec::Role;
|
||||
use nix::sys::signal::kill;
|
||||
use nix::sys::signal::Signal;
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Host;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
@@ -618,6 +619,7 @@ impl Endpoint {
|
||||
pgbouncer_settings: None,
|
||||
shard_stripe_size: Some(shard_stripe_size),
|
||||
local_proxy_config: None,
|
||||
reconfigure_concurrency: 1,
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
@@ -817,6 +819,7 @@ impl Endpoint {
|
||||
self.http_address.ip(),
|
||||
self.http_address.port()
|
||||
))
|
||||
.header(CONTENT_TYPE.as_str(), "application/json")
|
||||
.body(format!(
|
||||
"{{\"spec\":{}}}",
|
||||
serde_json::to_string_pretty(&spec)?
|
||||
|
||||
@@ -19,6 +19,10 @@ pub type PgIdent = String;
|
||||
/// String type alias representing Postgres extension version
|
||||
pub type ExtVersion = String;
|
||||
|
||||
fn default_reconfigure_concurrency() -> usize {
|
||||
1
|
||||
}
|
||||
|
||||
/// Cluster spec or configuration represented as an optional number of
|
||||
/// delta operations + final cluster state description.
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
@@ -67,7 +71,7 @@ pub struct ComputeSpec {
|
||||
pub cluster: Cluster,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
|
||||
/// An optinal hint that can be passed to speed up startup time if we know
|
||||
/// An optional hint that can be passed to speed up startup time if we know
|
||||
/// that no pg catalog mutations (like role creation, database creation,
|
||||
/// extension creation) need to be done on the actual database to start.
|
||||
#[serde(default)] // Default false
|
||||
@@ -86,9 +90,7 @@ pub struct ComputeSpec {
|
||||
// etc. GUCs in cluster.settings. TODO: Once the control plane has been
|
||||
// updated to fill these fields, we can make these non optional.
|
||||
pub tenant_id: Option<TenantId>,
|
||||
|
||||
pub timeline_id: Option<TimelineId>,
|
||||
|
||||
pub pageserver_connstring: Option<String>,
|
||||
|
||||
#[serde(default)]
|
||||
@@ -113,6 +115,20 @@ pub struct ComputeSpec {
|
||||
/// Local Proxy configuration used for JWT authentication
|
||||
#[serde(default)]
|
||||
pub local_proxy_config: Option<LocalProxySpec>,
|
||||
|
||||
/// Number of concurrent connections during the parallel RunInEachDatabase
|
||||
/// phase of the apply config process.
|
||||
///
|
||||
/// We need a higher concurrency during reconfiguration in case of many DBs,
|
||||
/// but instance is already running and used by client. We can easily get out of
|
||||
/// `max_connections` limit, and the current code won't handle that.
|
||||
///
|
||||
/// Default is 1, but also allow control plane to override this value for specific
|
||||
/// projects. It's also recommended to bump `superuser_reserved_connections` +=
|
||||
/// `reconfigure_concurrency` for such projects to ensure that we always have
|
||||
/// enough spare connections for reconfiguration process to succeed.
|
||||
#[serde(default = "default_reconfigure_concurrency")]
|
||||
pub reconfigure_concurrency: usize,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
@@ -315,6 +331,9 @@ mod tests {
|
||||
|
||||
// Features list defaults to empty vector.
|
||||
assert!(spec.features.is_empty());
|
||||
|
||||
// Reconfigure concurrency defaults to 1.
|
||||
assert_eq!(spec.reconfigure_concurrency, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -158,7 +158,8 @@ impl ShardIdentity {
|
||||
key_to_shard_number(self.count, self.stripe_size, key)
|
||||
}
|
||||
|
||||
/// Return true if the key should be ingested by this shard
|
||||
/// Return true if the key is stored only on this shard. This does not include
|
||||
/// global keys, see is_key_global().
|
||||
///
|
||||
/// Shards must ingest _at least_ keys which return true from this check.
|
||||
pub fn is_key_local(&self, key: &Key) -> bool {
|
||||
@@ -171,7 +172,7 @@ impl ShardIdentity {
|
||||
}
|
||||
|
||||
/// Return true if the key should be stored on all shards, not just one.
|
||||
fn is_key_global(&self, key: &Key) -> bool {
|
||||
pub fn is_key_global(&self, key: &Key) -> bool {
|
||||
if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() {
|
||||
// Special keys that are only stored on shard 0
|
||||
false
|
||||
|
||||
@@ -4,23 +4,18 @@ use crate::config::Host;
|
||||
use crate::config::SslMode;
|
||||
use crate::connection::{Request, RequestMessages};
|
||||
|
||||
use crate::query::RowStream;
|
||||
use crate::simple_query::SimpleQueryStream;
|
||||
|
||||
use crate::types::{Oid, ToSql, Type};
|
||||
use crate::types::{Oid, Type};
|
||||
|
||||
use crate::{
|
||||
prepare, query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
|
||||
SimpleQueryMessage, Statement, ToStatement, Transaction, TransactionBuilder,
|
||||
simple_query, CancelToken, Error, ReadyForQueryStatus, Statement, Transaction,
|
||||
TransactionBuilder,
|
||||
};
|
||||
use bytes::BytesMut;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{future, ready, TryStreamExt};
|
||||
use parking_lot::Mutex;
|
||||
use futures_util::{future, ready};
|
||||
use postgres_protocol2::message::{backend::Message, frontend};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
@@ -55,7 +50,7 @@ impl Responses {
|
||||
/// A cache of type info and prepared statements for fetching type info
|
||||
/// (corresponding to the queries in the [prepare] module).
|
||||
#[derive(Default)]
|
||||
struct CachedTypeInfo {
|
||||
pub(crate) struct CachedTypeInfo {
|
||||
/// A statement for basic information for a type from its
|
||||
/// OID. Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_QUERY) (or its
|
||||
/// fallback).
|
||||
@@ -71,13 +66,45 @@ struct CachedTypeInfo {
|
||||
/// Cache of types already looked up.
|
||||
types: HashMap<Oid, Type>,
|
||||
}
|
||||
impl CachedTypeInfo {
|
||||
pub(crate) fn typeinfo(&mut self) -> Option<&Statement> {
|
||||
self.typeinfo.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn set_typeinfo(&mut self, statement: Statement) -> &Statement {
|
||||
self.typeinfo.insert(statement)
|
||||
}
|
||||
|
||||
pub(crate) fn typeinfo_composite(&mut self) -> Option<&Statement> {
|
||||
self.typeinfo_composite.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn set_typeinfo_composite(&mut self, statement: Statement) -> &Statement {
|
||||
self.typeinfo_composite.insert(statement)
|
||||
}
|
||||
|
||||
pub(crate) fn typeinfo_enum(&mut self) -> Option<&Statement> {
|
||||
self.typeinfo_enum.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn set_typeinfo_enum(&mut self, statement: Statement) -> &Statement {
|
||||
self.typeinfo_enum.insert(statement)
|
||||
}
|
||||
|
||||
pub(crate) fn type_(&mut self, oid: Oid) -> Option<Type> {
|
||||
self.types.get(&oid).cloned()
|
||||
}
|
||||
|
||||
pub(crate) fn set_type(&mut self, oid: Oid, type_: &Type) {
|
||||
self.types.insert(oid, type_.clone());
|
||||
}
|
||||
}
|
||||
|
||||
pub struct InnerClient {
|
||||
sender: mpsc::UnboundedSender<Request>,
|
||||
cached_typeinfo: Mutex<CachedTypeInfo>,
|
||||
|
||||
/// A buffer to use when writing out postgres commands.
|
||||
buffer: Mutex<BytesMut>,
|
||||
buffer: BytesMut,
|
||||
}
|
||||
|
||||
impl InnerClient {
|
||||
@@ -92,47 +119,14 @@ impl InnerClient {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn typeinfo(&self) -> Option<Statement> {
|
||||
self.cached_typeinfo.lock().typeinfo.clone()
|
||||
}
|
||||
|
||||
pub fn set_typeinfo(&self, statement: &Statement) {
|
||||
self.cached_typeinfo.lock().typeinfo = Some(statement.clone());
|
||||
}
|
||||
|
||||
pub fn typeinfo_composite(&self) -> Option<Statement> {
|
||||
self.cached_typeinfo.lock().typeinfo_composite.clone()
|
||||
}
|
||||
|
||||
pub fn set_typeinfo_composite(&self, statement: &Statement) {
|
||||
self.cached_typeinfo.lock().typeinfo_composite = Some(statement.clone());
|
||||
}
|
||||
|
||||
pub fn typeinfo_enum(&self) -> Option<Statement> {
|
||||
self.cached_typeinfo.lock().typeinfo_enum.clone()
|
||||
}
|
||||
|
||||
pub fn set_typeinfo_enum(&self, statement: &Statement) {
|
||||
self.cached_typeinfo.lock().typeinfo_enum = Some(statement.clone());
|
||||
}
|
||||
|
||||
pub fn type_(&self, oid: Oid) -> Option<Type> {
|
||||
self.cached_typeinfo.lock().types.get(&oid).cloned()
|
||||
}
|
||||
|
||||
pub fn set_type(&self, oid: Oid, type_: &Type) {
|
||||
self.cached_typeinfo.lock().types.insert(oid, type_.clone());
|
||||
}
|
||||
|
||||
/// Call the given function with a buffer to be used when writing out
|
||||
/// postgres commands.
|
||||
pub fn with_buf<F, R>(&self, f: F) -> R
|
||||
pub fn with_buf<F, R>(&mut self, f: F) -> R
|
||||
where
|
||||
F: FnOnce(&mut BytesMut) -> R,
|
||||
{
|
||||
let mut buffer = self.buffer.lock();
|
||||
let r = f(&mut buffer);
|
||||
buffer.clear();
|
||||
let r = f(&mut self.buffer);
|
||||
self.buffer.clear();
|
||||
r
|
||||
}
|
||||
}
|
||||
@@ -150,7 +144,8 @@ pub struct SocketConfig {
|
||||
/// The client is one half of what is returned when a connection is established. Users interact with the database
|
||||
/// through this client object.
|
||||
pub struct Client {
|
||||
inner: Arc<InnerClient>,
|
||||
pub(crate) inner: InnerClient,
|
||||
pub(crate) cached_typeinfo: CachedTypeInfo,
|
||||
|
||||
socket_config: SocketConfig,
|
||||
ssl_mode: SslMode,
|
||||
@@ -167,11 +162,11 @@ impl Client {
|
||||
secret_key: i32,
|
||||
) -> Client {
|
||||
Client {
|
||||
inner: Arc::new(InnerClient {
|
||||
inner: InnerClient {
|
||||
sender,
|
||||
cached_typeinfo: Default::default(),
|
||||
buffer: Default::default(),
|
||||
}),
|
||||
},
|
||||
cached_typeinfo: Default::default(),
|
||||
|
||||
socket_config,
|
||||
ssl_mode,
|
||||
@@ -185,161 +180,6 @@ impl Client {
|
||||
self.process_id
|
||||
}
|
||||
|
||||
pub(crate) fn inner(&self) -> &Arc<InnerClient> {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
/// Creates a new prepared statement.
|
||||
///
|
||||
/// Prepared statements can be executed repeatedly, and may contain query parameters (indicated by `$1`, `$2`, etc),
|
||||
/// which are set when executed. Prepared statements can only be used with the connection that created them.
|
||||
pub async fn prepare(&self, query: &str) -> Result<Statement, Error> {
|
||||
self.prepare_typed(query, &[]).await
|
||||
}
|
||||
|
||||
/// Like `prepare`, but allows the types of query parameters to be explicitly specified.
|
||||
///
|
||||
/// The list of types may be smaller than the number of parameters - the types of the remaining parameters will be
|
||||
/// inferred. For example, `client.prepare_typed(query, &[])` is equivalent to `client.prepare(query)`.
|
||||
pub async fn prepare_typed(
|
||||
&self,
|
||||
query: &str,
|
||||
parameter_types: &[Type],
|
||||
) -> Result<Statement, Error> {
|
||||
prepare::prepare(&self.inner, query, parameter_types).await
|
||||
}
|
||||
|
||||
/// Executes a statement, returning a vector of the resulting rows.
|
||||
///
|
||||
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
|
||||
/// provided, 1-indexed.
|
||||
///
|
||||
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
|
||||
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
|
||||
/// with the `prepare` method.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the number of parameters provided does not match the number expected.
|
||||
pub async fn query<T>(
|
||||
&self,
|
||||
statement: &T,
|
||||
params: &[&(dyn ToSql + Sync)],
|
||||
) -> Result<Vec<Row>, Error>
|
||||
where
|
||||
T: ?Sized + ToStatement,
|
||||
{
|
||||
self.query_raw(statement, slice_iter(params))
|
||||
.await?
|
||||
.try_collect()
|
||||
.await
|
||||
}
|
||||
|
||||
/// The maximally flexible version of [`query`].
|
||||
///
|
||||
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
|
||||
/// provided, 1-indexed.
|
||||
///
|
||||
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
|
||||
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
|
||||
/// with the `prepare` method.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the number of parameters provided does not match the number expected.
|
||||
///
|
||||
/// [`query`]: #method.query
|
||||
pub async fn query_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<RowStream, Error>
|
||||
where
|
||||
T: ?Sized + ToStatement,
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let statement = statement.__convert().into_statement(self).await?;
|
||||
query::query(&self.inner, statement, params).await
|
||||
}
|
||||
|
||||
/// Pass text directly to the Postgres backend to allow it to sort out typing itself and
|
||||
/// to save a roundtrip
|
||||
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
I: IntoIterator<Item = Option<S>>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
query::query_txt(&self.inner, statement, params).await
|
||||
}
|
||||
|
||||
/// Executes a statement, returning the number of rows modified.
|
||||
///
|
||||
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
|
||||
/// provided, 1-indexed.
|
||||
///
|
||||
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
|
||||
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
|
||||
/// with the `prepare` method.
|
||||
///
|
||||
/// If the statement does not modify any rows (e.g. `SELECT`), 0 is returned.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the number of parameters provided does not match the number expected.
|
||||
pub async fn execute<T>(
|
||||
&self,
|
||||
statement: &T,
|
||||
params: &[&(dyn ToSql + Sync)],
|
||||
) -> Result<u64, Error>
|
||||
where
|
||||
T: ?Sized + ToStatement,
|
||||
{
|
||||
self.execute_raw(statement, slice_iter(params)).await
|
||||
}
|
||||
|
||||
/// The maximally flexible version of [`execute`].
|
||||
///
|
||||
/// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
|
||||
/// provided, 1-indexed.
|
||||
///
|
||||
/// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
|
||||
/// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
|
||||
/// with the `prepare` method.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the number of parameters provided does not match the number expected.
|
||||
///
|
||||
/// [`execute`]: #method.execute
|
||||
pub async fn execute_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<u64, Error>
|
||||
where
|
||||
T: ?Sized + ToStatement,
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let statement = statement.__convert().into_statement(self).await?;
|
||||
query::execute(self.inner(), statement, params).await
|
||||
}
|
||||
|
||||
/// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
|
||||
///
|
||||
/// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
|
||||
/// point. The simple query protocol returns the values in rows as strings rather than in their binary encodings,
|
||||
/// so the associated row type doesn't work with the `FromSql` trait. Rather than simply returning a list of the
|
||||
/// rows, this method returns a list of an enum which indicates either the completion of one of the commands,
|
||||
/// or a row of data. This preserves the framing between the separate statements in the request.
|
||||
///
|
||||
/// # Warning
|
||||
///
|
||||
/// Prepared statements should be use for any query which contains user-specified data, as they provided the
|
||||
/// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
|
||||
/// them to this method!
|
||||
pub async fn simple_query(&self, query: &str) -> Result<Vec<SimpleQueryMessage>, Error> {
|
||||
self.simple_query_raw(query).await?.try_collect().await
|
||||
}
|
||||
|
||||
pub(crate) async fn simple_query_raw(&self, query: &str) -> Result<SimpleQueryStream, Error> {
|
||||
simple_query::simple_query(self.inner(), query).await
|
||||
}
|
||||
|
||||
/// Executes a sequence of SQL statements using the simple query protocol.
|
||||
///
|
||||
/// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
|
||||
@@ -350,8 +190,8 @@ impl Client {
|
||||
/// Prepared statements should be use for any query which contains user-specified data, as they provided the
|
||||
/// functionality to safely embed that data in the request. Do not form statements via string concatenation and pass
|
||||
/// them to this method!
|
||||
pub async fn batch_execute(&self, query: &str) -> Result<ReadyForQueryStatus, Error> {
|
||||
simple_query::batch_execute(self.inner(), query).await
|
||||
pub async fn batch_execute(&mut self, query: &str) -> Result<ReadyForQueryStatus, Error> {
|
||||
simple_query::batch_execute(&mut self.inner, query).await
|
||||
}
|
||||
|
||||
/// Begins a new database transaction.
|
||||
@@ -359,7 +199,7 @@ impl Client {
|
||||
/// The transaction will roll back by default - use the `commit` method to commit it.
|
||||
pub async fn transaction(&mut self) -> Result<Transaction<'_>, Error> {
|
||||
struct RollbackIfNotDone<'me> {
|
||||
client: &'me Client,
|
||||
client: &'me mut Client,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
@@ -369,13 +209,13 @@ impl Client {
|
||||
return;
|
||||
}
|
||||
|
||||
let buf = self.client.inner().with_buf(|buf| {
|
||||
let buf = self.client.inner.with_buf(|buf| {
|
||||
frontend::query("ROLLBACK", buf).unwrap();
|
||||
buf.split().freeze()
|
||||
});
|
||||
let _ = self
|
||||
.client
|
||||
.inner()
|
||||
.inner
|
||||
.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
|
||||
}
|
||||
}
|
||||
@@ -390,7 +230,7 @@ impl Client {
|
||||
client: self,
|
||||
done: false,
|
||||
};
|
||||
self.batch_execute("BEGIN").await?;
|
||||
cleaner.client.batch_execute("BEGIN").await?;
|
||||
cleaner.done = true;
|
||||
}
|
||||
|
||||
@@ -416,11 +256,6 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
/// Query for type information
|
||||
pub async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
|
||||
crate::prepare::get_type(&self.inner, oid).await
|
||||
}
|
||||
|
||||
/// Determines if the connection to the server has already closed.
|
||||
///
|
||||
/// In that case, all future queries will fail.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::query::RowStream;
|
||||
use crate::query::{self, RowStream};
|
||||
use crate::types::Type;
|
||||
use crate::{Client, Error, Transaction};
|
||||
use async_trait::async_trait;
|
||||
@@ -13,33 +13,32 @@ mod private {
|
||||
/// This trait is "sealed", and cannot be implemented outside of this crate.
|
||||
#[async_trait]
|
||||
pub trait GenericClient: private::Sealed {
|
||||
/// Like `Client::query_raw_txt`.
|
||||
async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
where
|
||||
S: AsRef<str> + Sync + Send,
|
||||
I: IntoIterator<Item = Option<S>> + Sync + Send,
|
||||
I::IntoIter: ExactSizeIterator + Sync + Send;
|
||||
|
||||
/// Query for type information
|
||||
async fn get_type(&self, oid: Oid) -> Result<Type, Error>;
|
||||
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error>;
|
||||
}
|
||||
|
||||
impl private::Sealed for Client {}
|
||||
|
||||
#[async_trait]
|
||||
impl GenericClient for Client {
|
||||
async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
where
|
||||
S: AsRef<str> + Sync + Send,
|
||||
I: IntoIterator<Item = Option<S>> + Sync + Send,
|
||||
I::IntoIter: ExactSizeIterator + Sync + Send,
|
||||
{
|
||||
self.query_raw_txt(statement, params).await
|
||||
query::query_txt(&mut self.inner, statement, params).await
|
||||
}
|
||||
|
||||
/// Query for type information
|
||||
async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
|
||||
self.get_type(oid).await
|
||||
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
|
||||
crate::prepare::get_type(&mut self.inner, &mut self.cached_typeinfo, oid).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,17 +47,18 @@ impl private::Sealed for Transaction<'_> {}
|
||||
#[async_trait]
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
impl GenericClient for Transaction<'_> {
|
||||
async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
async fn query_raw_txt<S, I>(&mut self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
where
|
||||
S: AsRef<str> + Sync + Send,
|
||||
I: IntoIterator<Item = Option<S>> + Sync + Send,
|
||||
I::IntoIter: ExactSizeIterator + Sync + Send,
|
||||
{
|
||||
self.query_raw_txt(statement, params).await
|
||||
query::query_txt(&mut self.client().inner, statement, params).await
|
||||
}
|
||||
|
||||
/// Query for type information
|
||||
async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
|
||||
self.client().get_type(oid).await
|
||||
async fn get_type(&mut self, oid: Oid) -> Result<Type, Error> {
|
||||
let client = self.client();
|
||||
crate::prepare::get_type(&mut client.inner, &mut client.cached_typeinfo, oid).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,11 +10,10 @@ use crate::error::DbError;
|
||||
pub use crate::error::Error;
|
||||
pub use crate::generic_client::GenericClient;
|
||||
pub use crate::query::RowStream;
|
||||
pub use crate::row::{Row, SimpleQueryRow};
|
||||
pub use crate::simple_query::SimpleQueryStream;
|
||||
pub use crate::row::Row;
|
||||
pub use crate::statement::{Column, Statement};
|
||||
pub use crate::tls::NoTls;
|
||||
pub use crate::to_statement::ToStatement;
|
||||
// pub use crate::to_statement::ToStatement;
|
||||
pub use crate::transaction::Transaction;
|
||||
pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
|
||||
use crate::types::ToSql;
|
||||
@@ -65,7 +64,7 @@ pub mod row;
|
||||
mod simple_query;
|
||||
mod statement;
|
||||
pub mod tls;
|
||||
mod to_statement;
|
||||
// mod to_statement;
|
||||
mod transaction;
|
||||
mod transaction_builder;
|
||||
pub mod types;
|
||||
@@ -98,7 +97,6 @@ impl Notification {
|
||||
/// An asynchronous message from the server.
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone)]
|
||||
#[non_exhaustive]
|
||||
pub enum AsyncMessage {
|
||||
/// A notice.
|
||||
///
|
||||
@@ -110,18 +108,6 @@ pub enum AsyncMessage {
|
||||
Notification(Notification),
|
||||
}
|
||||
|
||||
/// Message returned by the `SimpleQuery` stream.
|
||||
#[derive(Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum SimpleQueryMessage {
|
||||
/// A row of data.
|
||||
Row(SimpleQueryRow),
|
||||
/// A statement in the query has completed.
|
||||
///
|
||||
/// The number of rows modified or selected is returned.
|
||||
CommandComplete(u64),
|
||||
}
|
||||
|
||||
fn slice_iter<'a>(
|
||||
s: &'a [&'a (dyn ToSql + Sync)],
|
||||
) -> impl ExactSizeIterator<Item = &'a (dyn ToSql + Sync)> + 'a {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::client::InnerClient;
|
||||
use crate::client::{CachedTypeInfo, InnerClient};
|
||||
use crate::codec::FrontendMessage;
|
||||
use crate::connection::RequestMessages;
|
||||
use crate::error::SqlState;
|
||||
@@ -7,14 +7,13 @@ use crate::{query, slice_iter};
|
||||
use crate::{Column, Error, Statement};
|
||||
use bytes::Bytes;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{pin_mut, TryStreamExt};
|
||||
use futures_util::{pin_mut, StreamExt, TryStreamExt};
|
||||
use log::debug;
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::pin::{pin, Pin};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub(crate) const TYPEINFO_QUERY: &str = "\
|
||||
SELECT t.typname, t.typtype, t.typelem, r.rngsubtype, t.typbasetype, n.nspname, t.typrelid
|
||||
@@ -59,7 +58,8 @@ ORDER BY attnum
|
||||
static NEXT_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
pub async fn prepare(
|
||||
client: &Arc<InnerClient>,
|
||||
client: &mut InnerClient,
|
||||
cache: &mut CachedTypeInfo,
|
||||
query: &str,
|
||||
types: &[Type],
|
||||
) -> Result<Statement, Error> {
|
||||
@@ -86,7 +86,7 @@ pub async fn prepare(
|
||||
let mut parameters = vec![];
|
||||
let mut it = parameter_description.parameters();
|
||||
while let Some(oid) = it.next().map_err(Error::parse)? {
|
||||
let type_ = get_type(client, oid).await?;
|
||||
let type_ = get_type(client, cache, oid).await?;
|
||||
parameters.push(type_);
|
||||
}
|
||||
|
||||
@@ -94,24 +94,30 @@ pub async fn prepare(
|
||||
if let Some(row_description) = row_description {
|
||||
let mut it = row_description.fields();
|
||||
while let Some(field) = it.next().map_err(Error::parse)? {
|
||||
let type_ = get_type(client, field.type_oid()).await?;
|
||||
let type_ = get_type(client, cache, field.type_oid()).await?;
|
||||
let column = Column::new(field.name().to_string(), type_, field);
|
||||
columns.push(column);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Statement::new(client, name, parameters, columns))
|
||||
Ok(Statement::new(name, parameters, columns))
|
||||
}
|
||||
|
||||
fn prepare_rec<'a>(
|
||||
client: &'a Arc<InnerClient>,
|
||||
client: &'a mut InnerClient,
|
||||
cache: &'a mut CachedTypeInfo,
|
||||
query: &'a str,
|
||||
types: &'a [Type],
|
||||
) -> Pin<Box<dyn Future<Output = Result<Statement, Error>> + 'a + Send>> {
|
||||
Box::pin(prepare(client, query, types))
|
||||
Box::pin(prepare(client, cache, query, types))
|
||||
}
|
||||
|
||||
fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
|
||||
fn encode(
|
||||
client: &mut InnerClient,
|
||||
name: &str,
|
||||
query: &str,
|
||||
types: &[Type],
|
||||
) -> Result<Bytes, Error> {
|
||||
if types.is_empty() {
|
||||
debug!("preparing query {}: {}", name, query);
|
||||
} else {
|
||||
@@ -126,16 +132,20 @@ fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Resu
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_type(client: &Arc<InnerClient>, oid: Oid) -> Result<Type, Error> {
|
||||
pub async fn get_type(
|
||||
client: &mut InnerClient,
|
||||
cache: &mut CachedTypeInfo,
|
||||
oid: Oid,
|
||||
) -> Result<Type, Error> {
|
||||
if let Some(type_) = Type::from_oid(oid) {
|
||||
return Ok(type_);
|
||||
}
|
||||
|
||||
if let Some(type_) = client.type_(oid) {
|
||||
if let Some(type_) = cache.type_(oid) {
|
||||
return Ok(type_);
|
||||
}
|
||||
|
||||
let stmt = typeinfo_statement(client).await?;
|
||||
let stmt = typeinfo_statement(client, cache).await?;
|
||||
|
||||
let rows = query::query(client, stmt, slice_iter(&[&oid])).await?;
|
||||
pin_mut!(rows);
|
||||
@@ -145,118 +155,141 @@ pub async fn get_type(client: &Arc<InnerClient>, oid: Oid) -> Result<Type, Error
|
||||
None => return Err(Error::unexpected_message()),
|
||||
};
|
||||
|
||||
let name: String = row.try_get(0)?;
|
||||
let type_: i8 = row.try_get(1)?;
|
||||
let elem_oid: Oid = row.try_get(2)?;
|
||||
let rngsubtype: Option<Oid> = row.try_get(3)?;
|
||||
let basetype: Oid = row.try_get(4)?;
|
||||
let schema: String = row.try_get(5)?;
|
||||
let relid: Oid = row.try_get(6)?;
|
||||
let name: String = row.try_get(stmt.columns(), 0)?;
|
||||
let type_: i8 = row.try_get(stmt.columns(), 1)?;
|
||||
let elem_oid: Oid = row.try_get(stmt.columns(), 2)?;
|
||||
let rngsubtype: Option<Oid> = row.try_get(stmt.columns(), 3)?;
|
||||
let basetype: Oid = row.try_get(stmt.columns(), 4)?;
|
||||
let schema: String = row.try_get(stmt.columns(), 5)?;
|
||||
let relid: Oid = row.try_get(stmt.columns(), 6)?;
|
||||
|
||||
let kind = if type_ == b'e' as i8 {
|
||||
let variants = get_enum_variants(client, oid).await?;
|
||||
let variants = get_enum_variants(client, cache, oid).await?;
|
||||
Kind::Enum(variants)
|
||||
} else if type_ == b'p' as i8 {
|
||||
Kind::Pseudo
|
||||
} else if basetype != 0 {
|
||||
let type_ = get_type_rec(client, basetype).await?;
|
||||
let type_ = get_type_rec(client, cache, basetype).await?;
|
||||
Kind::Domain(type_)
|
||||
} else if elem_oid != 0 {
|
||||
let type_ = get_type_rec(client, elem_oid).await?;
|
||||
let type_ = get_type_rec(client, cache, elem_oid).await?;
|
||||
Kind::Array(type_)
|
||||
} else if relid != 0 {
|
||||
let fields = get_composite_fields(client, relid).await?;
|
||||
let fields = get_composite_fields(client, cache, relid).await?;
|
||||
Kind::Composite(fields)
|
||||
} else if let Some(rngsubtype) = rngsubtype {
|
||||
let type_ = get_type_rec(client, rngsubtype).await?;
|
||||
let type_ = get_type_rec(client, cache, rngsubtype).await?;
|
||||
Kind::Range(type_)
|
||||
} else {
|
||||
Kind::Simple
|
||||
};
|
||||
|
||||
let type_ = Type::new(name, oid, kind, schema);
|
||||
client.set_type(oid, &type_);
|
||||
cache.set_type(oid, &type_);
|
||||
|
||||
Ok(type_)
|
||||
}
|
||||
|
||||
fn get_type_rec<'a>(
|
||||
client: &'a Arc<InnerClient>,
|
||||
client: &'a mut InnerClient,
|
||||
cache: &'a mut CachedTypeInfo,
|
||||
oid: Oid,
|
||||
) -> Pin<Box<dyn Future<Output = Result<Type, Error>> + Send + 'a>> {
|
||||
Box::pin(get_type(client, oid))
|
||||
Box::pin(get_type(client, cache, oid))
|
||||
}
|
||||
|
||||
async fn typeinfo_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = client.typeinfo() {
|
||||
return Ok(stmt);
|
||||
async fn typeinfo_statement<'c>(
|
||||
client: &mut InnerClient,
|
||||
cache: &'c mut CachedTypeInfo,
|
||||
) -> Result<&'c Statement, Error> {
|
||||
if cache.typeinfo().is_some() {
|
||||
// needed to get around a borrow checker limitation
|
||||
return Ok(cache.typeinfo().unwrap());
|
||||
}
|
||||
|
||||
let stmt = match prepare_rec(client, TYPEINFO_QUERY, &[]).await {
|
||||
let stmt = match prepare_rec(client, cache, TYPEINFO_QUERY, &[]).await {
|
||||
Ok(stmt) => stmt,
|
||||
Err(ref e) if e.code() == Some(&SqlState::UNDEFINED_TABLE) => {
|
||||
prepare_rec(client, TYPEINFO_FALLBACK_QUERY, &[]).await?
|
||||
prepare_rec(client, cache, TYPEINFO_FALLBACK_QUERY, &[]).await?
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
client.set_typeinfo(&stmt);
|
||||
Ok(stmt)
|
||||
Ok(cache.set_typeinfo(stmt))
|
||||
}
|
||||
|
||||
async fn get_enum_variants(client: &Arc<InnerClient>, oid: Oid) -> Result<Vec<String>, Error> {
|
||||
let stmt = typeinfo_enum_statement(client).await?;
|
||||
async fn get_enum_variants(
|
||||
client: &mut InnerClient,
|
||||
cache: &mut CachedTypeInfo,
|
||||
oid: Oid,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
let stmt = typeinfo_enum_statement(client, cache).await?;
|
||||
|
||||
query::query(client, stmt, slice_iter(&[&oid]))
|
||||
.await?
|
||||
.and_then(|row| async move { row.try_get(0) })
|
||||
.try_collect()
|
||||
.await
|
||||
let mut out = vec![];
|
||||
|
||||
let mut rows = pin!(query::query(client, stmt, slice_iter(&[&oid])).await?);
|
||||
while let Some(row) = rows.next().await {
|
||||
out.push(row?.try_get(stmt.columns(), 0)?)
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
async fn typeinfo_enum_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = client.typeinfo_enum() {
|
||||
return Ok(stmt);
|
||||
async fn typeinfo_enum_statement<'c>(
|
||||
client: &mut InnerClient,
|
||||
cache: &'c mut CachedTypeInfo,
|
||||
) -> Result<&'c Statement, Error> {
|
||||
if cache.typeinfo_enum().is_some() {
|
||||
// needed to get around a borrow checker limitation
|
||||
return Ok(cache.typeinfo_enum().unwrap());
|
||||
}
|
||||
|
||||
let stmt = match prepare_rec(client, TYPEINFO_ENUM_QUERY, &[]).await {
|
||||
let stmt = match prepare_rec(client, cache, TYPEINFO_ENUM_QUERY, &[]).await {
|
||||
Ok(stmt) => stmt,
|
||||
Err(ref e) if e.code() == Some(&SqlState::UNDEFINED_COLUMN) => {
|
||||
prepare_rec(client, TYPEINFO_ENUM_FALLBACK_QUERY, &[]).await?
|
||||
prepare_rec(client, cache, TYPEINFO_ENUM_FALLBACK_QUERY, &[]).await?
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
client.set_typeinfo_enum(&stmt);
|
||||
Ok(stmt)
|
||||
Ok(cache.set_typeinfo_enum(stmt))
|
||||
}
|
||||
|
||||
async fn get_composite_fields(client: &Arc<InnerClient>, oid: Oid) -> Result<Vec<Field>, Error> {
|
||||
let stmt = typeinfo_composite_statement(client).await?;
|
||||
async fn get_composite_fields(
|
||||
client: &mut InnerClient,
|
||||
cache: &mut CachedTypeInfo,
|
||||
oid: Oid,
|
||||
) -> Result<Vec<Field>, Error> {
|
||||
let stmt = typeinfo_composite_statement(client, cache).await?;
|
||||
|
||||
let rows = query::query(client, stmt, slice_iter(&[&oid]))
|
||||
.await?
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
let mut rows = pin!(query::query(client, stmt, slice_iter(&[&oid])).await?);
|
||||
|
||||
let mut oids = vec![];
|
||||
while let Some(row) = rows.next().await {
|
||||
let row = row?;
|
||||
let name = row.try_get(stmt.columns(), 0)?;
|
||||
let oid = row.try_get(stmt.columns(), 1)?;
|
||||
oids.push((name, oid));
|
||||
}
|
||||
|
||||
let mut fields = vec![];
|
||||
for row in rows {
|
||||
let name = row.try_get(0)?;
|
||||
let oid = row.try_get(1)?;
|
||||
let type_ = get_type_rec(client, oid).await?;
|
||||
for (name, oid) in oids {
|
||||
let type_ = get_type_rec(client, cache, oid).await?;
|
||||
fields.push(Field::new(name, type_));
|
||||
}
|
||||
|
||||
Ok(fields)
|
||||
}
|
||||
|
||||
async fn typeinfo_composite_statement(client: &Arc<InnerClient>) -> Result<Statement, Error> {
|
||||
if let Some(stmt) = client.typeinfo_composite() {
|
||||
return Ok(stmt);
|
||||
async fn typeinfo_composite_statement<'c>(
|
||||
client: &mut InnerClient,
|
||||
cache: &'c mut CachedTypeInfo,
|
||||
) -> Result<&'c Statement, Error> {
|
||||
if cache.typeinfo_composite().is_some() {
|
||||
// needed to get around a borrow checker limitation
|
||||
return Ok(cache.typeinfo_composite().unwrap());
|
||||
}
|
||||
|
||||
let stmt = prepare_rec(client, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
|
||||
let stmt = prepare_rec(client, cache, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
|
||||
|
||||
client.set_typeinfo_composite(&stmt);
|
||||
Ok(stmt)
|
||||
Ok(cache.set_typeinfo_composite(stmt))
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@ use postgres_types2::{Format, ToSql, Type};
|
||||
use std::fmt;
|
||||
use std::marker::PhantomPinned;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
struct BorrowToSqlParamsDebug<'a>(&'a [&'a (dyn ToSql + Sync)]);
|
||||
@@ -26,10 +25,10 @@ impl fmt::Debug for BorrowToSqlParamsDebug<'_> {
|
||||
}
|
||||
|
||||
pub async fn query<'a, I>(
|
||||
client: &InnerClient,
|
||||
statement: Statement,
|
||||
client: &mut InnerClient,
|
||||
statement: &Statement,
|
||||
params: I,
|
||||
) -> Result<RowStream, Error>
|
||||
) -> Result<RawRowStream, Error>
|
||||
where
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
@@ -41,13 +40,12 @@ where
|
||||
statement.name(),
|
||||
BorrowToSqlParamsDebug(params.as_slice()),
|
||||
);
|
||||
encode(client, &statement, params)?
|
||||
encode(client, statement, params)?
|
||||
} else {
|
||||
encode(client, &statement, params)?
|
||||
encode(client, statement, params)?
|
||||
};
|
||||
let responses = start(client, buf).await?;
|
||||
Ok(RowStream {
|
||||
statement,
|
||||
Ok(RawRowStream {
|
||||
responses,
|
||||
command_tag: None,
|
||||
status: ReadyForQueryStatus::Unknown,
|
||||
@@ -57,7 +55,7 @@ where
|
||||
}
|
||||
|
||||
pub async fn query_txt<S, I>(
|
||||
client: &Arc<InnerClient>,
|
||||
client: &mut InnerClient,
|
||||
query: &str,
|
||||
params: I,
|
||||
) -> Result<RowStream, Error>
|
||||
@@ -157,49 +155,6 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn execute<'a, I>(
|
||||
client: &InnerClient,
|
||||
statement: Statement,
|
||||
params: I,
|
||||
) -> Result<u64, Error>
|
||||
where
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let buf = if log_enabled!(Level::Debug) {
|
||||
let params = params.into_iter().collect::<Vec<_>>();
|
||||
debug!(
|
||||
"executing statement {} with parameters: {:?}",
|
||||
statement.name(),
|
||||
BorrowToSqlParamsDebug(params.as_slice()),
|
||||
);
|
||||
encode(client, &statement, params)?
|
||||
} else {
|
||||
encode(client, &statement, params)?
|
||||
};
|
||||
let mut responses = start(client, buf).await?;
|
||||
|
||||
let mut rows = 0;
|
||||
loop {
|
||||
match responses.next().await? {
|
||||
Message::DataRow(_) => {}
|
||||
Message::CommandComplete(body) => {
|
||||
rows = body
|
||||
.tag()
|
||||
.map_err(Error::parse)?
|
||||
.rsplit(' ')
|
||||
.next()
|
||||
.unwrap()
|
||||
.parse()
|
||||
.unwrap_or(0);
|
||||
}
|
||||
Message::EmptyQueryResponse => rows = 0,
|
||||
Message::ReadyForQuery(_) => return Ok(rows),
|
||||
_ => return Err(Error::unexpected_message()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
|
||||
let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
|
||||
|
||||
@@ -211,7 +166,11 @@ async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
|
||||
Ok(responses)
|
||||
}
|
||||
|
||||
pub fn encode<'a, I>(client: &InnerClient, statement: &Statement, params: I) -> Result<Bytes, Error>
|
||||
pub fn encode<'a, I>(
|
||||
client: &mut InnerClient,
|
||||
statement: &Statement,
|
||||
params: I,
|
||||
) -> Result<Bytes, Error>
|
||||
where
|
||||
I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
@@ -296,11 +255,7 @@ impl Stream for RowStream {
|
||||
loop {
|
||||
match ready!(this.responses.poll_next(cx)?) {
|
||||
Message::DataRow(body) => {
|
||||
return Poll::Ready(Some(Ok(Row::new(
|
||||
this.statement.clone(),
|
||||
body,
|
||||
*this.output_format,
|
||||
)?)))
|
||||
return Poll::Ready(Some(Ok(Row::new(body, *this.output_format)?)))
|
||||
}
|
||||
Message::EmptyQueryResponse | Message::PortalSuspended => {}
|
||||
Message::CommandComplete(body) => {
|
||||
@@ -338,3 +293,41 @@ impl RowStream {
|
||||
self.status
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// A stream of table rows.
|
||||
pub struct RawRowStream {
|
||||
responses: Responses,
|
||||
command_tag: Option<String>,
|
||||
output_format: Format,
|
||||
status: ReadyForQueryStatus,
|
||||
#[pin]
|
||||
_p: PhantomPinned,
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for RawRowStream {
|
||||
type Item = Result<Row, Error>;
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let this = self.project();
|
||||
loop {
|
||||
match ready!(this.responses.poll_next(cx)?) {
|
||||
Message::DataRow(body) => {
|
||||
return Poll::Ready(Some(Ok(Row::new(body, *this.output_format)?)))
|
||||
}
|
||||
Message::EmptyQueryResponse | Message::PortalSuspended => {}
|
||||
Message::CommandComplete(body) => {
|
||||
if let Ok(tag) = body.tag() {
|
||||
*this.command_tag = Some(tag.to_string());
|
||||
}
|
||||
}
|
||||
Message::ReadyForQuery(status) => {
|
||||
*this.status = status.into();
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
_ => return Poll::Ready(Some(Err(Error::unexpected_message()))),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,103 +1,16 @@
|
||||
//! Rows.
|
||||
|
||||
use crate::row::sealed::{AsName, Sealed};
|
||||
use crate::simple_query::SimpleColumn;
|
||||
use crate::statement::Column;
|
||||
use crate::types::{FromSql, Type, WrongType};
|
||||
use crate::{Error, Statement};
|
||||
use crate::Error;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use postgres_protocol2::message::backend::DataRowBody;
|
||||
use postgres_types2::{Format, WrongFormat};
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
use std::str;
|
||||
use std::sync::Arc;
|
||||
|
||||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
|
||||
pub trait AsName {
|
||||
fn as_name(&self) -> &str;
|
||||
}
|
||||
}
|
||||
|
||||
impl AsName for Column {
|
||||
fn as_name(&self) -> &str {
|
||||
self.name()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsName for String {
|
||||
fn as_name(&self) -> &str {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait implemented by types that can index into columns of a row.
|
||||
///
|
||||
/// This cannot be implemented outside of this crate.
|
||||
pub trait RowIndex: Sealed {
|
||||
#[doc(hidden)]
|
||||
fn __idx<T>(&self, columns: &[T]) -> Option<usize>
|
||||
where
|
||||
T: AsName;
|
||||
}
|
||||
|
||||
impl Sealed for usize {}
|
||||
|
||||
impl RowIndex for usize {
|
||||
#[inline]
|
||||
fn __idx<T>(&self, columns: &[T]) -> Option<usize>
|
||||
where
|
||||
T: AsName,
|
||||
{
|
||||
if *self >= columns.len() {
|
||||
None
|
||||
} else {
|
||||
Some(*self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sealed for str {}
|
||||
|
||||
impl RowIndex for str {
|
||||
#[inline]
|
||||
fn __idx<T>(&self, columns: &[T]) -> Option<usize>
|
||||
where
|
||||
T: AsName,
|
||||
{
|
||||
if let Some(idx) = columns.iter().position(|d| d.as_name() == self) {
|
||||
return Some(idx);
|
||||
};
|
||||
|
||||
// FIXME ASCII-only case insensitivity isn't really the right thing to
|
||||
// do. Postgres itself uses a dubious wrapper around tolower and JDBC
|
||||
// uses the US locale.
|
||||
columns
|
||||
.iter()
|
||||
.position(|d| d.as_name().eq_ignore_ascii_case(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Sealed for &T where T: ?Sized + Sealed {}
|
||||
|
||||
impl<T> RowIndex for &T
|
||||
where
|
||||
T: ?Sized + RowIndex,
|
||||
{
|
||||
#[inline]
|
||||
fn __idx<U>(&self, columns: &[U]) -> Option<usize>
|
||||
where
|
||||
U: AsName,
|
||||
{
|
||||
T::__idx(*self, columns)
|
||||
}
|
||||
}
|
||||
|
||||
/// A row of data returned from the database by a query.
|
||||
pub struct Row {
|
||||
statement: Statement,
|
||||
output_format: Format,
|
||||
body: DataRowBody,
|
||||
ranges: Vec<Option<Range<usize>>>,
|
||||
@@ -105,80 +18,33 @@ pub struct Row {
|
||||
|
||||
impl fmt::Debug for Row {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("Row")
|
||||
.field("columns", &self.columns())
|
||||
.finish()
|
||||
f.debug_struct("Row").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Row {
|
||||
pub(crate) fn new(
|
||||
statement: Statement,
|
||||
// statement: Statement,
|
||||
body: DataRowBody,
|
||||
output_format: Format,
|
||||
) -> Result<Row, Error> {
|
||||
let ranges = body.ranges().collect().map_err(Error::parse)?;
|
||||
Ok(Row {
|
||||
statement,
|
||||
body,
|
||||
ranges,
|
||||
output_format,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns information about the columns of data in the row.
|
||||
pub fn columns(&self) -> &[Column] {
|
||||
self.statement.columns()
|
||||
}
|
||||
|
||||
/// Determines if the row contains no values.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Returns the number of values in the row.
|
||||
pub fn len(&self) -> usize {
|
||||
self.columns().len()
|
||||
}
|
||||
|
||||
/// Deserializes a value from the row.
|
||||
///
|
||||
/// The value can be specified either by its numeric index in the row, or by its column name.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the index is out of bounds or if the value cannot be converted to the specified type.
|
||||
pub fn get<'a, I, T>(&'a self, idx: I) -> T
|
||||
pub(crate) fn try_get<'a, T>(&'a self, columns: &[Column], idx: usize) -> Result<T, Error>
|
||||
where
|
||||
I: RowIndex + fmt::Display,
|
||||
T: FromSql<'a>,
|
||||
{
|
||||
match self.get_inner(&idx) {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => panic!("error retrieving column {}: {}", idx, err),
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `Row::get`, but returns a `Result` rather than panicking.
|
||||
pub fn try_get<'a, I, T>(&'a self, idx: I) -> Result<T, Error>
|
||||
where
|
||||
I: RowIndex + fmt::Display,
|
||||
T: FromSql<'a>,
|
||||
{
|
||||
self.get_inner(&idx)
|
||||
}
|
||||
|
||||
fn get_inner<'a, I, T>(&'a self, idx: &I) -> Result<T, Error>
|
||||
where
|
||||
I: RowIndex + fmt::Display,
|
||||
T: FromSql<'a>,
|
||||
{
|
||||
let idx = match idx.__idx(self.columns()) {
|
||||
Some(idx) => idx,
|
||||
None => return Err(Error::column(idx.to_string())),
|
||||
let Some(column) = columns.get(idx) else {
|
||||
return Err(Error::column(idx.to_string()));
|
||||
};
|
||||
|
||||
let ty = self.columns()[idx].type_();
|
||||
let ty = column.type_();
|
||||
if !T::accepts(ty) {
|
||||
return Err(Error::from_sql(
|
||||
Box::new(WrongType::new::<T>(ty.clone())),
|
||||
@@ -216,85 +82,3 @@ impl Row {
|
||||
self.body.buffer().len()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsName for SimpleColumn {
|
||||
fn as_name(&self) -> &str {
|
||||
self.name()
|
||||
}
|
||||
}
|
||||
|
||||
/// A row of data returned from the database by a simple query.
|
||||
#[derive(Debug)]
|
||||
pub struct SimpleQueryRow {
|
||||
columns: Arc<[SimpleColumn]>,
|
||||
body: DataRowBody,
|
||||
ranges: Vec<Option<Range<usize>>>,
|
||||
}
|
||||
|
||||
impl SimpleQueryRow {
|
||||
#[allow(clippy::new_ret_no_self)]
|
||||
pub(crate) fn new(
|
||||
columns: Arc<[SimpleColumn]>,
|
||||
body: DataRowBody,
|
||||
) -> Result<SimpleQueryRow, Error> {
|
||||
let ranges = body.ranges().collect().map_err(Error::parse)?;
|
||||
Ok(SimpleQueryRow {
|
||||
columns,
|
||||
body,
|
||||
ranges,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns information about the columns of data in the row.
|
||||
pub fn columns(&self) -> &[SimpleColumn] {
|
||||
&self.columns
|
||||
}
|
||||
|
||||
/// Determines if the row contains no values.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Returns the number of values in the row.
|
||||
pub fn len(&self) -> usize {
|
||||
self.columns.len()
|
||||
}
|
||||
|
||||
/// Returns a value from the row.
|
||||
///
|
||||
/// The value can be specified either by its numeric index in the row, or by its column name.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the index is out of bounds or if the value cannot be converted to the specified type.
|
||||
pub fn get<I>(&self, idx: I) -> Option<&str>
|
||||
where
|
||||
I: RowIndex + fmt::Display,
|
||||
{
|
||||
match self.get_inner(&idx) {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => panic!("error retrieving column {}: {}", idx, err),
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `SimpleQueryRow::get`, but returns a `Result` rather than panicking.
|
||||
pub fn try_get<I>(&self, idx: I) -> Result<Option<&str>, Error>
|
||||
where
|
||||
I: RowIndex + fmt::Display,
|
||||
{
|
||||
self.get_inner(&idx)
|
||||
}
|
||||
|
||||
fn get_inner<I>(&self, idx: &I) -> Result<Option<&str>, Error>
|
||||
where
|
||||
I: RowIndex + fmt::Display,
|
||||
{
|
||||
let idx = match idx.__idx(&self.columns) {
|
||||
Some(idx) => idx,
|
||||
None => return Err(Error::column(idx.to_string())),
|
||||
};
|
||||
|
||||
let buf = self.ranges[idx].clone().map(|r| &self.body.buffer()[r]);
|
||||
FromSql::from_sql_nullable(&Type::TEXT, buf).map_err(|e| Error::from_sql(e, idx))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,52 +1,14 @@
|
||||
use crate::client::{InnerClient, Responses};
|
||||
use crate::client::InnerClient;
|
||||
use crate::codec::FrontendMessage;
|
||||
use crate::connection::RequestMessages;
|
||||
use crate::{Error, ReadyForQueryStatus, SimpleQueryMessage, SimpleQueryRow};
|
||||
use crate::{Error, ReadyForQueryStatus};
|
||||
use bytes::Bytes;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use futures_util::{ready, Stream};
|
||||
use log::debug;
|
||||
use pin_project_lite::pin_project;
|
||||
use postgres_protocol2::message::backend::Message;
|
||||
use postgres_protocol2::message::frontend;
|
||||
use std::marker::PhantomPinned;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
/// Information about a column of a single query row.
|
||||
#[derive(Debug)]
|
||||
pub struct SimpleColumn {
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl SimpleColumn {
|
||||
pub(crate) fn new(name: String) -> SimpleColumn {
|
||||
SimpleColumn { name }
|
||||
}
|
||||
|
||||
/// Returns the name of the column.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn simple_query(client: &InnerClient, query: &str) -> Result<SimpleQueryStream, Error> {
|
||||
debug!("executing simple query: {}", query);
|
||||
|
||||
let buf = encode(client, query)?;
|
||||
let responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
|
||||
|
||||
Ok(SimpleQueryStream {
|
||||
responses,
|
||||
columns: None,
|
||||
status: ReadyForQueryStatus::Unknown,
|
||||
_p: PhantomPinned,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn batch_execute(
|
||||
client: &InnerClient,
|
||||
client: &mut InnerClient,
|
||||
query: &str,
|
||||
) -> Result<ReadyForQueryStatus, Error> {
|
||||
debug!("executing statement batch: {}", query);
|
||||
@@ -66,77 +28,9 @@ pub async fn batch_execute(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn encode(client: &InnerClient, query: &str) -> Result<Bytes, Error> {
|
||||
pub(crate) fn encode(client: &mut InnerClient, query: &str) -> Result<Bytes, Error> {
|
||||
client.with_buf(|buf| {
|
||||
frontend::query(query, buf).map_err(Error::encode)?;
|
||||
Ok(buf.split().freeze())
|
||||
})
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// A stream of simple query results.
|
||||
pub struct SimpleQueryStream {
|
||||
responses: Responses,
|
||||
columns: Option<Arc<[SimpleColumn]>>,
|
||||
status: ReadyForQueryStatus,
|
||||
#[pin]
|
||||
_p: PhantomPinned,
|
||||
}
|
||||
}
|
||||
|
||||
impl SimpleQueryStream {
|
||||
/// Returns if the connection is ready for querying, with the status of the connection.
|
||||
///
|
||||
/// This might be available only after the stream has been exhausted.
|
||||
pub fn ready_status(&self) -> ReadyForQueryStatus {
|
||||
self.status
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for SimpleQueryStream {
|
||||
type Item = Result<SimpleQueryMessage, Error>;
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let this = self.project();
|
||||
loop {
|
||||
match ready!(this.responses.poll_next(cx)?) {
|
||||
Message::CommandComplete(body) => {
|
||||
let rows = body
|
||||
.tag()
|
||||
.map_err(Error::parse)?
|
||||
.rsplit(' ')
|
||||
.next()
|
||||
.unwrap()
|
||||
.parse()
|
||||
.unwrap_or(0);
|
||||
return Poll::Ready(Some(Ok(SimpleQueryMessage::CommandComplete(rows))));
|
||||
}
|
||||
Message::EmptyQueryResponse => {
|
||||
return Poll::Ready(Some(Ok(SimpleQueryMessage::CommandComplete(0))));
|
||||
}
|
||||
Message::RowDescription(body) => {
|
||||
let columns = body
|
||||
.fields()
|
||||
.map(|f| Ok(SimpleColumn::new(f.name().to_string())))
|
||||
.collect::<Vec<_>>()
|
||||
.map_err(Error::parse)?
|
||||
.into();
|
||||
|
||||
*this.columns = Some(columns);
|
||||
}
|
||||
Message::DataRow(body) => {
|
||||
let row = match &this.columns {
|
||||
Some(columns) => SimpleQueryRow::new(columns.clone(), body)?,
|
||||
None => return Poll::Ready(Some(Err(Error::unexpected_message()))),
|
||||
};
|
||||
return Poll::Ready(Some(Ok(SimpleQueryMessage::Row(row))));
|
||||
}
|
||||
Message::ReadyForQuery(s) => {
|
||||
*this.status = s.into();
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
_ => return Poll::Ready(Some(Err(Error::unexpected_message()))),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,64 +1,33 @@
|
||||
use crate::client::InnerClient;
|
||||
use crate::codec::FrontendMessage;
|
||||
use crate::connection::RequestMessages;
|
||||
use crate::types::Type;
|
||||
use postgres_protocol2::{
|
||||
message::{backend::Field, frontend},
|
||||
Oid,
|
||||
};
|
||||
use std::{
|
||||
fmt,
|
||||
sync::{Arc, Weak},
|
||||
};
|
||||
use postgres_protocol2::{message::backend::Field, Oid};
|
||||
use std::fmt;
|
||||
|
||||
struct StatementInner {
|
||||
client: Weak<InnerClient>,
|
||||
name: String,
|
||||
params: Vec<Type>,
|
||||
columns: Vec<Column>,
|
||||
}
|
||||
|
||||
impl Drop for StatementInner {
|
||||
fn drop(&mut self) {
|
||||
if let Some(client) = self.client.upgrade() {
|
||||
let buf = client.with_buf(|buf| {
|
||||
frontend::close(b'S', &self.name, buf).unwrap();
|
||||
frontend::sync(buf);
|
||||
buf.split().freeze()
|
||||
});
|
||||
let _ = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A prepared statement.
|
||||
///
|
||||
/// Prepared statements can only be used with the connection that created them.
|
||||
#[derive(Clone)]
|
||||
pub struct Statement(Arc<StatementInner>);
|
||||
pub struct Statement(StatementInner);
|
||||
|
||||
impl Statement {
|
||||
pub(crate) fn new(
|
||||
inner: &Arc<InnerClient>,
|
||||
name: String,
|
||||
params: Vec<Type>,
|
||||
columns: Vec<Column>,
|
||||
) -> Statement {
|
||||
Statement(Arc::new(StatementInner {
|
||||
client: Arc::downgrade(inner),
|
||||
pub(crate) fn new(name: String, params: Vec<Type>, columns: Vec<Column>) -> Statement {
|
||||
Statement(StatementInner {
|
||||
name,
|
||||
params,
|
||||
columns,
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn new_anonymous(params: Vec<Type>, columns: Vec<Column>) -> Statement {
|
||||
Statement(Arc::new(StatementInner {
|
||||
client: Weak::new(),
|
||||
Statement(StatementInner {
|
||||
name: String::new(),
|
||||
params,
|
||||
columns,
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn name(&self) -> &str {
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
use crate::to_statement::private::{Sealed, ToStatementType};
|
||||
use crate::Statement;
|
||||
|
||||
mod private {
|
||||
use crate::{Client, Error, Statement};
|
||||
|
||||
pub trait Sealed {}
|
||||
|
||||
pub enum ToStatementType<'a> {
|
||||
Statement(&'a Statement),
|
||||
Query(&'a str),
|
||||
}
|
||||
|
||||
impl<'a> ToStatementType<'a> {
|
||||
pub async fn into_statement(self, client: &Client) -> Result<Statement, Error> {
|
||||
match self {
|
||||
ToStatementType::Statement(s) => Ok(s.clone()),
|
||||
ToStatementType::Query(s) => client.prepare(s).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait abstracting over prepared and unprepared statements.
|
||||
///
|
||||
/// Many methods are generic over this bound, so that they support both a raw query string as well as a statement which
|
||||
/// was prepared previously.
|
||||
///
|
||||
/// This trait is "sealed" and cannot be implemented by anything outside this crate.
|
||||
pub trait ToStatement: Sealed {
|
||||
#[doc(hidden)]
|
||||
fn __convert(&self) -> ToStatementType<'_>;
|
||||
}
|
||||
|
||||
impl ToStatement for Statement {
|
||||
fn __convert(&self) -> ToStatementType<'_> {
|
||||
ToStatementType::Statement(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Sealed for Statement {}
|
||||
|
||||
impl ToStatement for str {
|
||||
fn __convert(&self) -> ToStatementType<'_> {
|
||||
ToStatementType::Query(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Sealed for str {}
|
||||
|
||||
impl ToStatement for String {
|
||||
fn __convert(&self) -> ToStatementType<'_> {
|
||||
ToStatementType::Query(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Sealed for String {}
|
||||
@@ -1,6 +1,5 @@
|
||||
use crate::codec::FrontendMessage;
|
||||
use crate::connection::RequestMessages;
|
||||
use crate::query::RowStream;
|
||||
use crate::{CancelToken, Client, Error, ReadyForQueryStatus};
|
||||
use postgres_protocol2::message::frontend;
|
||||
|
||||
@@ -19,13 +18,13 @@ impl Drop for Transaction<'_> {
|
||||
return;
|
||||
}
|
||||
|
||||
let buf = self.client.inner().with_buf(|buf| {
|
||||
let buf = self.client.inner.with_buf(|buf| {
|
||||
frontend::query("ROLLBACK", buf).unwrap();
|
||||
buf.split().freeze()
|
||||
});
|
||||
let _ = self
|
||||
.client
|
||||
.inner()
|
||||
.inner
|
||||
.send(RequestMessages::Single(FrontendMessage::Raw(buf)));
|
||||
}
|
||||
}
|
||||
@@ -52,23 +51,13 @@ impl<'a> Transaction<'a> {
|
||||
self.client.batch_execute("ROLLBACK").await
|
||||
}
|
||||
|
||||
/// Like `Client::query_raw_txt`.
|
||||
pub async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
I: IntoIterator<Item = Option<S>>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
self.client.query_raw_txt(statement, params).await
|
||||
}
|
||||
|
||||
/// Like `Client::cancel_token`.
|
||||
pub fn cancel_token(&self) -> CancelToken {
|
||||
self.client.cancel_token()
|
||||
}
|
||||
|
||||
/// Returns a reference to the underlying `Client`.
|
||||
pub fn client(&self) -> &Client {
|
||||
pub fn client(&mut self) -> &mut Client {
|
||||
self.client
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,6 +164,12 @@ impl TenantShardId {
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardNumber {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ShardSlug<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
|
||||
@@ -87,7 +87,7 @@ use crate::tenant::timeline::offload::offload_timeline;
|
||||
use crate::tenant::timeline::offload::OffloadError;
|
||||
use crate::tenant::timeline::CompactFlags;
|
||||
use crate::tenant::timeline::CompactOptions;
|
||||
use crate::tenant::timeline::CompactRange;
|
||||
use crate::tenant::timeline::CompactRequest;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::timeline::Timeline;
|
||||
use crate::tenant::GetTimelineError;
|
||||
@@ -1978,6 +1978,26 @@ async fn timeline_gc_handler(
|
||||
json_response(StatusCode::OK, gc_result)
|
||||
}
|
||||
|
||||
// Cancel scheduled compaction tasks
|
||||
async fn timeline_cancel_compact_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let state = get_state(&request);
|
||||
async {
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
tenant.cancel_scheduled_compaction(timeline_id);
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
.instrument(info_span!("timeline_cancel_compact", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
async fn timeline_compact_handler(
|
||||
mut request: Request<Body>,
|
||||
@@ -1987,7 +2007,7 @@ async fn timeline_compact_handler(
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let compact_range = json_request_maybe::<Option<CompactRange>>(&mut request).await?;
|
||||
let compact_request = json_request_maybe::<Option<CompactRequest>>(&mut request).await?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
@@ -2012,22 +2032,42 @@ async fn timeline_compact_handler(
|
||||
let wait_until_uploaded =
|
||||
parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);
|
||||
|
||||
let wait_until_scheduled_compaction_done =
|
||||
parse_query_param::<_, bool>(&request, "wait_until_scheduled_compaction_done")?
|
||||
.unwrap_or(false);
|
||||
|
||||
let options = CompactOptions {
|
||||
compact_range,
|
||||
compact_range: compact_request
|
||||
.as_ref()
|
||||
.and_then(|r| r.compact_range.clone()),
|
||||
compact_below_lsn: compact_request.as_ref().and_then(|r| r.compact_below_lsn),
|
||||
flags,
|
||||
};
|
||||
|
||||
let scheduled = compact_request.map(|r| r.scheduled).unwrap_or(false);
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
|
||||
timeline
|
||||
.compact_with_options(&cancel, options, &ctx)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
if wait_until_uploaded {
|
||||
timeline.remote_client.wait_completion().await
|
||||
// XXX map to correct ApiError for the cases where it's due to shutdown
|
||||
.context("wait completion").map_err(ApiError::InternalServerError)?;
|
||||
if scheduled {
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
let rx = tenant.schedule_compaction(timeline_id, options).await;
|
||||
if wait_until_scheduled_compaction_done {
|
||||
// It is possible that this will take a long time, dropping the HTTP request will not cancel the compaction.
|
||||
rx.await.ok();
|
||||
}
|
||||
} else {
|
||||
timeline
|
||||
.compact_with_options(&cancel, options, &ctx)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
if wait_until_uploaded {
|
||||
timeline.remote_client.wait_completion().await
|
||||
// XXX map to correct ApiError for the cases where it's due to shutdown
|
||||
.context("wait completion").map_err(ApiError::InternalServerError)?;
|
||||
}
|
||||
}
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
@@ -2108,16 +2148,20 @@ async fn timeline_checkpoint_handler(
|
||||
// By default, checkpoints come with a compaction, but this may be optionally disabled by tests that just want to flush + upload.
|
||||
let compact = parse_query_param::<_, bool>(&request, "compact")?.unwrap_or(true);
|
||||
|
||||
let wait_until_flushed: bool =
|
||||
parse_query_param(&request, "wait_until_flushed")?.unwrap_or(true);
|
||||
|
||||
let wait_until_uploaded =
|
||||
parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
|
||||
timeline
|
||||
.freeze_and_flush()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if wait_until_flushed {
|
||||
timeline.freeze_and_flush().await
|
||||
} else {
|
||||
timeline.freeze().await.and(Ok(()))
|
||||
}.map_err(|e| {
|
||||
match e {
|
||||
tenant::timeline::FlushLayerError::Cancelled => ApiError::ShuttingDown,
|
||||
other => ApiError::InternalServerError(other.into()),
|
||||
@@ -3301,6 +3345,10 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|
||||
|r| api_handler(r, timeline_compact_handler),
|
||||
)
|
||||
.delete(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|
||||
|r| api_handler(r, timeline_cancel_compact_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/offload",
|
||||
|r| testing_api_handler("attempt timeline offload", r, timeline_offload_handler),
|
||||
|
||||
@@ -464,6 +464,24 @@ static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_disk_consistent_lsn",
|
||||
"Disk consistent LSN grouped by timeline",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_projected_remote_consistent_lsn",
|
||||
"Projected remote consistent LSN grouped by timeline",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_pitr_history_size",
|
||||
@@ -2394,7 +2412,8 @@ pub(crate) struct TimelineMetrics {
|
||||
pub load_layer_map_histo: StorageTimeMetrics,
|
||||
pub garbage_collect_histo: StorageTimeMetrics,
|
||||
pub find_gc_cutoffs_histo: StorageTimeMetrics,
|
||||
pub last_record_gauge: IntGauge,
|
||||
pub last_record_lsn_gauge: IntGauge,
|
||||
pub disk_consistent_lsn_gauge: IntGauge,
|
||||
pub pitr_history_size: UIntGauge,
|
||||
pub archival_size: UIntGauge,
|
||||
pub(crate) layer_size_image: UIntGauge,
|
||||
@@ -2475,7 +2494,11 @@ impl TimelineMetrics {
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let last_record_gauge = LAST_RECORD_LSN
|
||||
let last_record_lsn_gauge = LAST_RECORD_LSN
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
@@ -2578,7 +2601,8 @@ impl TimelineMetrics {
|
||||
garbage_collect_histo,
|
||||
find_gc_cutoffs_histo,
|
||||
load_layer_map_histo,
|
||||
last_record_gauge,
|
||||
last_record_lsn_gauge,
|
||||
disk_consistent_lsn_gauge,
|
||||
pitr_history_size,
|
||||
archival_size,
|
||||
layer_size_image,
|
||||
@@ -2642,6 +2666,7 @@ impl TimelineMetrics {
|
||||
let timeline_id = &self.timeline_id;
|
||||
let shard_id = &self.shard_id;
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
{
|
||||
@@ -2805,6 +2830,7 @@ pub(crate) struct RemoteTimelineClientMetrics {
|
||||
calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
|
||||
bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
|
||||
bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
|
||||
pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
|
||||
}
|
||||
|
||||
impl RemoteTimelineClientMetrics {
|
||||
@@ -2819,6 +2845,10 @@ impl RemoteTimelineClientMetrics {
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
|
||||
.get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
|
||||
.unwrap();
|
||||
|
||||
RemoteTimelineClientMetrics {
|
||||
tenant_id: tenant_id_str,
|
||||
shard_id: shard_id_str,
|
||||
@@ -2827,6 +2857,7 @@ impl RemoteTimelineClientMetrics {
|
||||
bytes_started_counter: Mutex::new(HashMap::default()),
|
||||
bytes_finished_counter: Mutex::new(HashMap::default()),
|
||||
remote_physical_size_gauge,
|
||||
projected_remote_consistent_lsn_gauge,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3040,6 +3071,7 @@ impl Drop for RemoteTimelineClientMetrics {
|
||||
calls,
|
||||
bytes_started_counter,
|
||||
bytes_finished_counter,
|
||||
projected_remote_consistent_lsn_gauge,
|
||||
} = self;
|
||||
for ((a, b), _) in calls.get_mut().unwrap().drain() {
|
||||
let mut res = [Ok(()), Ok(())];
|
||||
@@ -3069,6 +3101,14 @@ impl Drop for RemoteTimelineClientMetrics {
|
||||
let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
|
||||
let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
}
|
||||
{
|
||||
let _ = projected_remote_consistent_lsn_gauge;
|
||||
let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,14 +37,18 @@ use remote_timeline_client::manifest::{
|
||||
};
|
||||
use remote_timeline_client::UploadQueueNotReadyError;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::VecDeque;
|
||||
use std::fmt;
|
||||
use std::future::Future;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Weak;
|
||||
use std::time::SystemTime;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use timeline::compaction::ScheduledCompactionTask;
|
||||
use timeline::import_pgdata;
|
||||
use timeline::offload::offload_timeline;
|
||||
use timeline::CompactFlags;
|
||||
use timeline::CompactOptions;
|
||||
use timeline::ShutdownMode;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::sync::watch;
|
||||
@@ -339,6 +343,11 @@ pub struct Tenant {
|
||||
/// Overhead of mutex is acceptable because compaction is done with a multi-second period.
|
||||
compaction_circuit_breaker: std::sync::Mutex<CircuitBreaker>,
|
||||
|
||||
/// Scheduled compaction tasks. Currently, this can only be populated by triggering
|
||||
/// a manual gc-compaction from the manual compaction API.
|
||||
scheduled_compaction_tasks:
|
||||
std::sync::Mutex<HashMap<TimelineId, VecDeque<ScheduledCompactionTask>>>,
|
||||
|
||||
/// If the tenant is in Activating state, notify this to encourage it
|
||||
/// to proceed to Active as soon as possible, rather than waiting for lazy
|
||||
/// background warmup.
|
||||
@@ -2953,27 +2962,68 @@ impl Tenant {
|
||||
|
||||
for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
|
||||
{
|
||||
// pending_task_left == None: cannot compact, maybe still pending tasks
|
||||
// pending_task_left == Some(true): compaction task left
|
||||
// pending_task_left == Some(false): no compaction task left
|
||||
let pending_task_left = if *can_compact {
|
||||
Some(
|
||||
timeline
|
||||
.compact(cancel, EnumSet::empty(), ctx)
|
||||
.instrument(info_span!("compact_timeline", %timeline_id))
|
||||
.await
|
||||
.inspect_err(|e| match e {
|
||||
timeline::CompactionError::ShuttingDown => (),
|
||||
timeline::CompactionError::Offload(_) => {
|
||||
// Failures to offload timelines do not trip the circuit breaker, because
|
||||
// they do not do lots of writes the way compaction itself does: it is cheap
|
||||
// to retry, and it would be bad to stop all compaction because of an issue with offloading.
|
||||
let has_pending_l0_compaction_task = timeline
|
||||
.compact(cancel, EnumSet::empty(), ctx)
|
||||
.instrument(info_span!("compact_timeline", %timeline_id))
|
||||
.await
|
||||
.inspect_err(|e| match e {
|
||||
timeline::CompactionError::ShuttingDown => (),
|
||||
timeline::CompactionError::Offload(_) => {
|
||||
// Failures to offload timelines do not trip the circuit breaker, because
|
||||
// they do not do lots of writes the way compaction itself does: it is cheap
|
||||
// to retry, and it would be bad to stop all compaction because of an issue with offloading.
|
||||
}
|
||||
timeline::CompactionError::Other(e) => {
|
||||
self.compaction_circuit_breaker
|
||||
.lock()
|
||||
.unwrap()
|
||||
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
|
||||
}
|
||||
})?;
|
||||
if has_pending_l0_compaction_task {
|
||||
Some(true)
|
||||
} else {
|
||||
let has_pending_scheduled_compaction_task;
|
||||
let next_scheduled_compaction_task = {
|
||||
let mut guard = self.scheduled_compaction_tasks.lock().unwrap();
|
||||
if let Some(tline_pending_tasks) = guard.get_mut(timeline_id) {
|
||||
let next_task = tline_pending_tasks.pop_front();
|
||||
has_pending_scheduled_compaction_task = !tline_pending_tasks.is_empty();
|
||||
next_task
|
||||
} else {
|
||||
has_pending_scheduled_compaction_task = false;
|
||||
None
|
||||
}
|
||||
};
|
||||
if let Some(mut next_scheduled_compaction_task) = next_scheduled_compaction_task
|
||||
{
|
||||
if !next_scheduled_compaction_task
|
||||
.options
|
||||
.flags
|
||||
.contains(CompactFlags::EnhancedGcBottomMostCompaction)
|
||||
{
|
||||
warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", next_scheduled_compaction_task.options);
|
||||
} else {
|
||||
let _ = timeline
|
||||
.compact_with_options(
|
||||
cancel,
|
||||
next_scheduled_compaction_task.options,
|
||||
ctx,
|
||||
)
|
||||
.instrument(info_span!("scheduled_compact_timeline", %timeline_id))
|
||||
.await?;
|
||||
if let Some(tx) = next_scheduled_compaction_task.result_tx.take() {
|
||||
// TODO: we can send compaction statistics in the future
|
||||
tx.send(()).ok();
|
||||
}
|
||||
timeline::CompactionError::Other(e) => {
|
||||
self.compaction_circuit_breaker
|
||||
.lock()
|
||||
.unwrap()
|
||||
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
|
||||
}
|
||||
})?,
|
||||
)
|
||||
}
|
||||
}
|
||||
Some(has_pending_scheduled_compaction_task)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -2993,6 +3043,36 @@ impl Tenant {
|
||||
Ok(has_pending_task)
|
||||
}
|
||||
|
||||
/// Cancel scheduled compaction tasks
|
||||
pub(crate) fn cancel_scheduled_compaction(
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
) -> Vec<ScheduledCompactionTask> {
|
||||
let mut guard = self.scheduled_compaction_tasks.lock().unwrap();
|
||||
if let Some(tline_pending_tasks) = guard.get_mut(&timeline_id) {
|
||||
let current_tline_pending_tasks = std::mem::take(tline_pending_tasks);
|
||||
current_tline_pending_tasks.into_iter().collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Schedule a compaction task for a timeline.
|
||||
pub(crate) async fn schedule_compaction(
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
options: CompactOptions,
|
||||
) -> tokio::sync::oneshot::Receiver<()> {
|
||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||
let mut guard = self.scheduled_compaction_tasks.lock().unwrap();
|
||||
let tline_pending_tasks = guard.entry(timeline_id).or_default();
|
||||
tline_pending_tasks.push_back(ScheduledCompactionTask {
|
||||
options,
|
||||
result_tx: Some(tx),
|
||||
});
|
||||
rx
|
||||
}
|
||||
|
||||
// Call through to all timelines to freeze ephemeral layers if needed. Usually
|
||||
// this happens during ingest: this background housekeeping is for freezing layers
|
||||
// that are open but haven't been written to for some time.
|
||||
@@ -4005,6 +4085,7 @@ impl Tenant {
|
||||
// use an extremely long backoff.
|
||||
Some(Duration::from_secs(3600 * 24)),
|
||||
)),
|
||||
scheduled_compaction_tasks: Mutex::new(Default::default()),
|
||||
activate_now_sem: tokio::sync::Semaphore::new(0),
|
||||
attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()),
|
||||
cancel: CancellationToken::default(),
|
||||
@@ -9163,6 +9244,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: dryrun_flags,
|
||||
compact_range: None,
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -9399,6 +9481,7 @@ mod tests {
|
||||
CompactOptions {
|
||||
flags: dryrun_flags,
|
||||
compact_range: None,
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
@@ -9885,7 +9968,15 @@ mod tests {
|
||||
|
||||
// Do a partial compaction on key range 0..2
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(0)..get_key(2), &cancel, EnumSet::new(), &ctx)
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_range: Some((get_key(0)..get_key(2)).into()),
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
@@ -9924,7 +10015,15 @@ mod tests {
|
||||
|
||||
// Do a partial compaction on key range 2..4
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(2)..get_key(4), &cancel, EnumSet::new(), &ctx)
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_range: Some((get_key(2)..get_key(4)).into()),
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
@@ -9968,7 +10067,15 @@ mod tests {
|
||||
|
||||
// Do a partial compaction on key range 4..9
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(4)..get_key(9), &cancel, EnumSet::new(), &ctx)
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_range: Some((get_key(4)..get_key(9)).into()),
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
@@ -10011,7 +10118,15 @@ mod tests {
|
||||
|
||||
// Do a partial compaction on key range 9..10
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(9)..get_key(10), &cancel, EnumSet::new(), &ctx)
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_range: Some((get_key(9)..get_key(10)).into()),
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
@@ -10059,7 +10174,15 @@ mod tests {
|
||||
|
||||
// Do a partial compaction on key range 0..10, all image layers below LSN 20 can be replaced with new ones.
|
||||
tline
|
||||
.partial_compact_with_gc(get_key(0)..get_key(10), &cancel, EnumSet::new(), &ctx)
|
||||
.compact_with_gc(
|
||||
&cancel,
|
||||
CompactOptions {
|
||||
flags: EnumSet::new(),
|
||||
compact_range: Some((get_key(0)..get_key(10)).into()),
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await;
|
||||
|
||||
@@ -2192,6 +2192,9 @@ impl RemoteTimelineClient {
|
||||
upload_queue.clean.1 = Some(task.task_id);
|
||||
|
||||
let lsn = upload_queue.clean.0.metadata.disk_consistent_lsn();
|
||||
self.metrics
|
||||
.projected_remote_consistent_lsn_gauge
|
||||
.set(lsn.0);
|
||||
|
||||
if self.generation.is_none() {
|
||||
// Legacy mode: skip validating generation
|
||||
|
||||
@@ -53,7 +53,7 @@ use utils::{
|
||||
postgres_client::PostgresClientProtocol,
|
||||
sync::gate::{Gate, GateGuard},
|
||||
};
|
||||
use wal_decoder::serialized_batch::SerializedValueBatch;
|
||||
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
||||
|
||||
use std::sync::atomic::Ordering as AtomicOrdering;
|
||||
use std::sync::{Arc, Mutex, RwLock, Weak};
|
||||
@@ -768,7 +768,7 @@ pub enum GetLogicalSizePriority {
|
||||
Background,
|
||||
}
|
||||
|
||||
#[derive(enumset::EnumSetType)]
|
||||
#[derive(Debug, enumset::EnumSetType)]
|
||||
pub(crate) enum CompactFlags {
|
||||
ForceRepartition,
|
||||
ForceImageLayerCreation,
|
||||
@@ -777,6 +777,16 @@ pub(crate) enum CompactFlags {
|
||||
DryRun,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize)]
|
||||
pub(crate) struct CompactRequest {
|
||||
pub compact_range: Option<CompactRange>,
|
||||
pub compact_below_lsn: Option<Lsn>,
|
||||
/// Whether the compaction job should be scheduled.
|
||||
#[serde(default)]
|
||||
pub scheduled: bool,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize)]
|
||||
pub(crate) struct CompactRange {
|
||||
@@ -786,10 +796,24 @@ pub(crate) struct CompactRange {
|
||||
pub end: Key,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
impl From<Range<Key>> for CompactRange {
|
||||
fn from(range: Range<Key>) -> Self {
|
||||
CompactRange {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct CompactOptions {
|
||||
pub flags: EnumSet<CompactFlags>,
|
||||
/// If set, the compaction will only compact the key range specified by this option.
|
||||
/// This option is only used by GC compaction.
|
||||
pub compact_range: Option<CompactRange>,
|
||||
/// If set, the compaction will only compact the LSN below this value.
|
||||
/// This option is only used by GC compaction.
|
||||
pub compact_below_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Timeline {
|
||||
@@ -1433,23 +1457,31 @@ impl Timeline {
|
||||
Ok(lease)
|
||||
}
|
||||
|
||||
/// Flush to disk all data that was written with the put_* functions
|
||||
/// Freeze the current open in-memory layer. It will be written to disk on next iteration.
|
||||
/// Returns the flush request ID which can be awaited with wait_flush_completion().
|
||||
#[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
|
||||
pub(crate) async fn freeze(&self) -> Result<u64, FlushLayerError> {
|
||||
self.freeze0().await
|
||||
}
|
||||
|
||||
/// Freeze and flush the open in-memory layer, waiting for it to be written to disk.
|
||||
#[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
|
||||
pub(crate) async fn freeze_and_flush(&self) -> Result<(), FlushLayerError> {
|
||||
self.freeze_and_flush0().await
|
||||
}
|
||||
|
||||
/// Freeze the current open in-memory layer. It will be written to disk on next iteration.
|
||||
/// Returns the flush request ID which can be awaited with wait_flush_completion().
|
||||
pub(crate) async fn freeze0(&self) -> Result<u64, FlushLayerError> {
|
||||
let mut g = self.write_lock.lock().await;
|
||||
let to_lsn = self.get_last_record_lsn();
|
||||
self.freeze_inmem_layer_at(to_lsn, &mut g).await
|
||||
}
|
||||
|
||||
// This exists to provide a non-span creating version of `freeze_and_flush` we can call without
|
||||
// polluting the span hierarchy.
|
||||
pub(crate) async fn freeze_and_flush0(&self) -> Result<(), FlushLayerError> {
|
||||
let token = {
|
||||
// Freeze the current open in-memory layer. It will be written to disk on next
|
||||
// iteration.
|
||||
let mut g = self.write_lock.lock().await;
|
||||
|
||||
let to_lsn = self.get_last_record_lsn();
|
||||
self.freeze_inmem_layer_at(to_lsn, &mut g).await?
|
||||
};
|
||||
let token = self.freeze0().await?;
|
||||
self.wait_flush_completion(token).await
|
||||
}
|
||||
|
||||
@@ -1604,6 +1636,7 @@ impl Timeline {
|
||||
CompactOptions {
|
||||
flags,
|
||||
compact_range: None,
|
||||
compact_below_lsn: None,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
@@ -2359,7 +2392,7 @@ impl Timeline {
|
||||
|
||||
result
|
||||
.metrics
|
||||
.last_record_gauge
|
||||
.last_record_lsn_gauge
|
||||
.set(disk_consistent_lsn.0 as i64);
|
||||
result
|
||||
})
|
||||
@@ -3481,7 +3514,7 @@ impl Timeline {
|
||||
pub(crate) fn finish_write(&self, new_lsn: Lsn) {
|
||||
assert!(new_lsn.is_aligned());
|
||||
|
||||
self.metrics.last_record_gauge.set(new_lsn.0 as i64);
|
||||
self.metrics.last_record_lsn_gauge.set(new_lsn.0 as i64);
|
||||
self.last_record_lsn.advance(new_lsn);
|
||||
}
|
||||
|
||||
@@ -3849,6 +3882,10 @@ impl Timeline {
|
||||
fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {
|
||||
let old_value = self.disk_consistent_lsn.fetch_max(new_value);
|
||||
assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}");
|
||||
|
||||
self.metrics
|
||||
.disk_consistent_lsn_gauge
|
||||
.set(new_value.0 as i64);
|
||||
new_value != old_value
|
||||
}
|
||||
|
||||
@@ -5887,6 +5924,23 @@ impl<'a> TimelineWriter<'a> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// In debug builds, assert that we don't write any keys that don't belong to this shard.
|
||||
// We don't assert this in release builds, since key ownership policies may change over
|
||||
// time. Stray keys will be removed during compaction.
|
||||
if cfg!(debug_assertions) {
|
||||
for metadata in &batch.metadata {
|
||||
if let ValueMeta::Serialized(metadata) = metadata {
|
||||
let key = Key::from_compact(metadata.key);
|
||||
assert!(
|
||||
self.shard_identity.is_key_local(&key)
|
||||
|| self.shard_identity.is_key_global(&key),
|
||||
"key {key} does not belong on shard {}",
|
||||
self.shard_identity.shard_index()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let batch_max_lsn = batch.max_lsn;
|
||||
let buf_size: u64 = batch.buffer_size() as u64;
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ use super::{
|
||||
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use bytes::Bytes;
|
||||
use enumset::EnumSet;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::key::KEY_SIZE;
|
||||
@@ -64,6 +63,12 @@ use super::CompactionError;
|
||||
/// Maximum number of deltas before generating an image layer in bottom-most compaction.
|
||||
const COMPACTION_DELTA_THRESHOLD: usize = 5;
|
||||
|
||||
/// A scheduled compaction task.
|
||||
pub struct ScheduledCompactionTask {
|
||||
pub options: CompactOptions,
|
||||
pub result_tx: Option<tokio::sync::oneshot::Sender<()>>,
|
||||
}
|
||||
|
||||
pub struct GcCompactionJobDescription {
|
||||
/// All layers to read in the compaction job
|
||||
selected_layers: Vec<Layer>,
|
||||
@@ -1174,11 +1179,12 @@ impl Timeline {
|
||||
.await
|
||||
.map_err(CompactionError::Other)?;
|
||||
} else {
|
||||
debug!(
|
||||
"Dropping key {} during compaction (it belongs on shard {:?})",
|
||||
key,
|
||||
self.shard_identity.get_shard_number(&key)
|
||||
);
|
||||
let shard = self.shard_identity.shard_index();
|
||||
let owner = self.shard_identity.get_shard_number(&key);
|
||||
if cfg!(debug_assertions) {
|
||||
panic!("key {key} does not belong on shard {shard}, owned by {owner}");
|
||||
}
|
||||
debug!("dropping key {key} during compaction (it belongs on shard {owner})");
|
||||
}
|
||||
|
||||
if !new_layers.is_empty() {
|
||||
@@ -1746,24 +1752,6 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn compact_with_gc(
|
||||
self: &Arc<Self>,
|
||||
cancel: &CancellationToken,
|
||||
options: CompactOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
self.partial_compact_with_gc(
|
||||
options
|
||||
.compact_range
|
||||
.map(|range| range.start..range.end)
|
||||
.unwrap_or_else(|| Key::MIN..Key::MAX),
|
||||
cancel,
|
||||
options.flags,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// An experimental compaction building block that combines compaction with garbage collection.
|
||||
///
|
||||
/// The current implementation picks all delta + image layers that are below or intersecting with
|
||||
@@ -1771,17 +1759,19 @@ impl Timeline {
|
||||
/// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon,
|
||||
/// and create delta layers with all deltas >= gc horizon.
|
||||
///
|
||||
/// If `key_range` is provided, it will only compact the keys within the range, aka partial compaction.
|
||||
/// If `options.compact_range` is provided, it will only compact the keys within the range, aka partial compaction.
|
||||
/// Partial compaction will read and process all layers overlapping with the key range, even if it might
|
||||
/// contain extra keys. After the gc-compaction phase completes, delta layers that are not fully contained
|
||||
/// within the key range will be rewritten to ensure they do not overlap with the delta layers. Providing
|
||||
/// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not
|
||||
/// part of the range.
|
||||
pub(crate) async fn partial_compact_with_gc(
|
||||
///
|
||||
/// If `options.compact_below_lsn` is provided, the compaction will only compact layers below or intersect with
|
||||
/// the LSN. Otherwise, it will use the gc cutoff by default.
|
||||
pub(crate) async fn compact_with_gc(
|
||||
self: &Arc<Self>,
|
||||
compaction_key_range: Range<Key>,
|
||||
cancel: &CancellationToken,
|
||||
flags: EnumSet<CompactFlags>,
|
||||
options: CompactOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
// Block other compaction/GC tasks from running for now. GC-compaction could run along
|
||||
@@ -1803,6 +1793,12 @@ impl Timeline {
|
||||
)
|
||||
.await?;
|
||||
|
||||
let flags = options.flags;
|
||||
let compaction_key_range = options
|
||||
.compact_range
|
||||
.map(|range| range.start..range.end)
|
||||
.unwrap_or_else(|| Key::MIN..Key::MAX);
|
||||
|
||||
let dry_run = flags.contains(CompactFlags::DryRun);
|
||||
|
||||
if compaction_key_range == (Key::MIN..Key::MAX) {
|
||||
@@ -1826,7 +1822,18 @@ impl Timeline {
|
||||
let layers = guard.layer_map()?;
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
let mut retain_lsns_below_horizon = Vec::new();
|
||||
let gc_cutoff = gc_info.cutoffs.select_min();
|
||||
let gc_cutoff = {
|
||||
let real_gc_cutoff = gc_info.cutoffs.select_min();
|
||||
// The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for
|
||||
// each of the retain_lsn. Therefore, if the user-provided `compact_below_lsn` is larger than the real gc cutoff, we will use
|
||||
// the real cutoff.
|
||||
let mut gc_cutoff = options.compact_below_lsn.unwrap_or(real_gc_cutoff);
|
||||
if gc_cutoff > real_gc_cutoff {
|
||||
warn!("provided compact_below_lsn={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff);
|
||||
gc_cutoff = real_gc_cutoff;
|
||||
}
|
||||
gc_cutoff
|
||||
};
|
||||
for (lsn, _timeline_id, _is_offloaded) in &gc_info.retain_lsns {
|
||||
if lsn < &gc_cutoff {
|
||||
retain_lsns_below_horizon.push(*lsn);
|
||||
@@ -1846,7 +1853,7 @@ impl Timeline {
|
||||
.map(|desc| desc.get_lsn_range().end)
|
||||
.max()
|
||||
else {
|
||||
info!("no layers to compact with gc");
|
||||
info!("no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff);
|
||||
return Ok(());
|
||||
};
|
||||
// Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key
|
||||
@@ -1869,7 +1876,7 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
if selected_layers.is_empty() {
|
||||
info!("no layers to compact with gc");
|
||||
info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compaction_key_range.start, compaction_key_range.end);
|
||||
return Ok(());
|
||||
}
|
||||
retain_lsns_below_horizon.sort();
|
||||
@@ -2048,6 +2055,11 @@ impl Timeline {
|
||||
// This is not handled in the filter iterator because shard is determined by hash.
|
||||
// Therefore, it does not give us any performance benefit to do things like skip
|
||||
// a whole layer file as handling key spaces (ranges).
|
||||
if cfg!(debug_assertions) {
|
||||
let shard = self.shard_identity.shard_index();
|
||||
let owner = self.shard_identity.get_shard_number(&key);
|
||||
panic!("key {key} does not belong on shard {shard}, owned by {owner}");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if !job_desc.compaction_key_range.contains(&key) {
|
||||
|
||||
@@ -582,18 +582,21 @@ impl WalIngest {
|
||||
forknum: FSM_FORKNUM,
|
||||
};
|
||||
|
||||
// Zero out the last remaining FSM page, if this shard owns it. We are not precise here,
|
||||
// and instead of digging in the FSM bitmap format we just clear the whole page.
|
||||
let fsm_logical_page_no = blkno / pg_constants::SLOTS_PER_FSM_PAGE;
|
||||
let mut fsm_physical_page_no = fsm_logical_to_physical(fsm_logical_page_no);
|
||||
if blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0 {
|
||||
// Tail of last remaining FSM page has to be zeroed.
|
||||
// We are not precise here and instead of digging in FSM bitmap format just clear the whole page.
|
||||
if blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0
|
||||
&& self
|
||||
.shard
|
||||
.is_key_local(&rel_block_to_key(rel, fsm_physical_page_no))
|
||||
{
|
||||
modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?;
|
||||
fsm_physical_page_no += 1;
|
||||
}
|
||||
// TODO: re-examine the None case here wrt. sharding; should we error?
|
||||
// Truncate this shard's view of the FSM relation size, if it even has one.
|
||||
let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0);
|
||||
if nblocks > fsm_physical_page_no {
|
||||
// check if something to do: FSM is larger than truncate position
|
||||
self.put_rel_truncation(modification, rel, fsm_physical_page_no, ctx)
|
||||
.await?;
|
||||
}
|
||||
@@ -617,7 +620,7 @@ impl WalIngest {
|
||||
// tail bits in the last remaining map page, representing truncated heap
|
||||
// blocks, need to be cleared. This is not only tidy, but also necessary
|
||||
// because we don't get a chance to clear the bits if the heap is extended
|
||||
// again.
|
||||
// again. Only do this on the shard that owns the page.
|
||||
if (trunc_byte != 0 || trunc_offs != 0)
|
||||
&& self.shard.is_key_local(&rel_block_to_key(rel, vm_page_no))
|
||||
{
|
||||
@@ -631,10 +634,9 @@ impl WalIngest {
|
||||
)?;
|
||||
vm_page_no += 1;
|
||||
}
|
||||
// TODO: re-examine the None case here wrt. sharding; should we error?
|
||||
// Truncate this shard's view of the VM relation size, if it even has one.
|
||||
let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0);
|
||||
if nblocks > vm_page_no {
|
||||
// check if something to do: VM is larger than truncate position
|
||||
self.put_rel_truncation(modification, rel, vm_page_no, ctx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
@@ -340,7 +340,7 @@ impl PoolingBackend {
|
||||
debug!("setting up backend session state");
|
||||
|
||||
// initiates the auth session
|
||||
if let Err(e) = client.execute("select auth.init()", &[]).await {
|
||||
if let Err(e) = client.batch_execute("select auth.init();").await {
|
||||
discard.discard();
|
||||
return Err(e.into());
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ use smallvec::SmallVec;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info, info_span, warn, Instrument};
|
||||
use tracing::{debug, error, info, info_span, Instrument};
|
||||
#[cfg(test)]
|
||||
use {
|
||||
super::conn_pool_lib::GlobalConnPoolOptions,
|
||||
@@ -125,13 +125,10 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
|
||||
match message {
|
||||
Some(Ok(AsyncMessage::Notice(notice))) => {
|
||||
info!(%session_id, "notice: {}", notice);
|
||||
debug!(%session_id, "notice: {}", notice);
|
||||
}
|
||||
Some(Ok(AsyncMessage::Notification(notif))) => {
|
||||
warn!(%session_id, pid = notif.process_id(), channel = notif.channel(), "notification received");
|
||||
}
|
||||
Some(Ok(_)) => {
|
||||
warn!(%session_id, "unknown message");
|
||||
debug!(%session_id, pid = notif.process_id(), channel = notif.channel(), "notification received");
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
error!(%session_id, "connection error: {}", e);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use postgres_client::types::{Kind, Type};
|
||||
use postgres_client::Row;
|
||||
use postgres_client::{Column, Row};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
//
|
||||
@@ -77,14 +77,14 @@ pub(crate) enum JsonConversionError {
|
||||
//
|
||||
pub(crate) fn pg_text_row_to_json(
|
||||
row: &Row,
|
||||
columns: &[Type],
|
||||
columns: &[Column],
|
||||
c_types: &[Type],
|
||||
raw_output: bool,
|
||||
array_mode: bool,
|
||||
) -> Result<Value, JsonConversionError> {
|
||||
let iter = row
|
||||
.columns()
|
||||
let iter = columns
|
||||
.iter()
|
||||
.zip(columns)
|
||||
.zip(c_types)
|
||||
.enumerate()
|
||||
.map(|(i, (column, typ))| {
|
||||
let name = column.name();
|
||||
|
||||
@@ -23,14 +23,13 @@ use jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding};
|
||||
use p256::ecdsa::{Signature, SigningKey};
|
||||
use parking_lot::RwLock;
|
||||
use postgres_client::tls::NoTlsStream;
|
||||
use postgres_client::types::ToSql;
|
||||
use postgres_client::AsyncMessage;
|
||||
use serde_json::value::RawValue;
|
||||
use signature::Signer;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, info_span, warn, Instrument};
|
||||
use tracing::{debug, error, info, info_span, Instrument};
|
||||
|
||||
use super::backend::HttpConnError;
|
||||
use super::conn_pool_lib::{
|
||||
@@ -229,13 +228,10 @@ pub(crate) fn poll_client<C: ClientInnerExt>(
|
||||
|
||||
match message {
|
||||
Some(Ok(AsyncMessage::Notice(notice))) => {
|
||||
info!(%session_id, "notice: {}", notice);
|
||||
debug!(%session_id, "notice: {}", notice);
|
||||
}
|
||||
Some(Ok(AsyncMessage::Notification(notif))) => {
|
||||
warn!(%session_id, pid = notif.process_id(), channel = notif.channel(), "notification received");
|
||||
}
|
||||
Some(Ok(_)) => {
|
||||
warn!(%session_id, "unknown message");
|
||||
debug!(%session_id, pid = notif.process_id(), channel = notif.channel(), "notification received");
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
error!(%session_id, "connection error: {}", e);
|
||||
@@ -287,12 +283,11 @@ impl ClientInnerCommon<postgres_client::Client> {
|
||||
let token = resign_jwt(&local_data.key, payload, local_data.jti)?;
|
||||
|
||||
// initiates the auth session
|
||||
self.inner.batch_execute("discard all").await?;
|
||||
// the token contains only `[a-zA-Z1-9_-\.]+` so it cannot escape the string literal formatting.
|
||||
self.inner
|
||||
.execute(
|
||||
"select auth.jwt_session_init($1)",
|
||||
&[&&*token as &(dyn ToSql + Sync)],
|
||||
)
|
||||
.batch_execute(&format!(
|
||||
"discard all; select auth.jwt_session_init('{token}');"
|
||||
))
|
||||
.await?;
|
||||
|
||||
let pid = self.inner.get_process_id();
|
||||
|
||||
@@ -797,7 +797,13 @@ impl QueryData {
|
||||
let cancel_token = inner.cancel_token();
|
||||
|
||||
let res = match select(
|
||||
pin!(query_to_json(config, &*inner, self, &mut 0, parsed_headers)),
|
||||
pin!(query_to_json(
|
||||
config,
|
||||
&mut *inner,
|
||||
self,
|
||||
&mut 0,
|
||||
parsed_headers
|
||||
)),
|
||||
pin!(cancel.cancelled()),
|
||||
)
|
||||
.await
|
||||
@@ -881,7 +887,7 @@ impl BatchQueryData {
|
||||
builder = builder.deferrable(true);
|
||||
}
|
||||
|
||||
let transaction = builder.start().await.inspect_err(|_| {
|
||||
let mut transaction = builder.start().await.inspect_err(|_| {
|
||||
// if we cannot start a transaction, we should return immediately
|
||||
// and not return to the pool. connection is clearly broken
|
||||
discard.discard();
|
||||
@@ -890,7 +896,7 @@ impl BatchQueryData {
|
||||
let json_output = match query_batch(
|
||||
config,
|
||||
cancel.child_token(),
|
||||
&transaction,
|
||||
&mut transaction,
|
||||
self,
|
||||
parsed_headers,
|
||||
)
|
||||
@@ -934,7 +940,7 @@ impl BatchQueryData {
|
||||
async fn query_batch(
|
||||
config: &'static HttpConfig,
|
||||
cancel: CancellationToken,
|
||||
transaction: &Transaction<'_>,
|
||||
transaction: &mut Transaction<'_>,
|
||||
queries: BatchQueryData,
|
||||
parsed_headers: HttpHeaders,
|
||||
) -> Result<String, SqlOverHttpError> {
|
||||
@@ -972,7 +978,7 @@ async fn query_batch(
|
||||
|
||||
async fn query_to_json<T: GenericClient>(
|
||||
config: &'static HttpConfig,
|
||||
client: &T,
|
||||
client: &mut T,
|
||||
data: QueryData,
|
||||
current_size: &mut usize,
|
||||
parsed_headers: HttpHeaders,
|
||||
@@ -1027,7 +1033,7 @@ async fn query_to_json<T: GenericClient>(
|
||||
|
||||
let columns_len = row_stream.columns().len();
|
||||
let mut fields = Vec::with_capacity(columns_len);
|
||||
let mut columns = Vec::with_capacity(columns_len);
|
||||
let mut c_types = Vec::with_capacity(columns_len);
|
||||
|
||||
for c in row_stream.columns() {
|
||||
fields.push(json!({
|
||||
@@ -1039,7 +1045,7 @@ async fn query_to_json<T: GenericClient>(
|
||||
"dataTypeModifier": c.type_modifier(),
|
||||
"format": "text",
|
||||
}));
|
||||
columns.push(client.get_type(c.type_oid()).await?);
|
||||
c_types.push(client.get_type(c.type_oid()).await?);
|
||||
}
|
||||
|
||||
let array_mode = data.array_mode.unwrap_or(parsed_headers.default_array_mode);
|
||||
@@ -1047,7 +1053,15 @@ async fn query_to_json<T: GenericClient>(
|
||||
// convert rows to JSON
|
||||
let rows = rows
|
||||
.iter()
|
||||
.map(|row| pg_text_row_to_json(row, &columns, parsed_headers.raw_output, array_mode))
|
||||
.map(|row| {
|
||||
pg_text_row_to_json(
|
||||
row,
|
||||
row_stream.columns(),
|
||||
&c_types,
|
||||
parsed_headers.raw_output,
|
||||
array_mode,
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
// Resulting JSON format is based on the format of node-postgres result.
|
||||
|
||||
@@ -789,7 +789,7 @@ impl Service {
|
||||
node_list_futs.push({
|
||||
async move {
|
||||
tracing::info!("Scanning shards on node {node}...");
|
||||
let timeout = Duration::from_secs(1);
|
||||
let timeout = Duration::from_secs(5);
|
||||
let response = node
|
||||
.with_client_retries(
|
||||
|client| async move { client.list_location_config().await },
|
||||
|
||||
@@ -268,7 +268,7 @@ impl BucketConfig {
|
||||
config.bucket_name, config.bucket_region
|
||||
),
|
||||
RemoteStorageKind::AzureContainer(config) => format!(
|
||||
"bucket {}, storage account {:?}, region {}",
|
||||
"container {}, storage account {:?}, region {}",
|
||||
config.container_name, config.storage_account, config.container_region
|
||||
),
|
||||
}
|
||||
|
||||
@@ -152,6 +152,8 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = (
|
||||
"pageserver_resident_physical_size",
|
||||
"pageserver_io_operations_bytes_total",
|
||||
"pageserver_last_record_lsn",
|
||||
"pageserver_disk_consistent_lsn",
|
||||
"pageserver_projected_remote_consistent_lsn",
|
||||
"pageserver_standby_horizon",
|
||||
"pageserver_smgr_query_seconds_bucket",
|
||||
"pageserver_smgr_query_seconds_count",
|
||||
|
||||
@@ -850,6 +850,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
force_repartition=False,
|
||||
force_image_layer_creation=False,
|
||||
force_l0_compaction=False,
|
||||
wait_until_flushed=True,
|
||||
wait_until_uploaded=False,
|
||||
compact: bool | None = None,
|
||||
**kwargs,
|
||||
@@ -862,6 +863,8 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
query["force_image_layer_creation"] = "true"
|
||||
if force_l0_compaction:
|
||||
query["force_l0_compaction"] = "true"
|
||||
if not wait_until_flushed:
|
||||
query["wait_until_flushed"] = "false"
|
||||
if wait_until_uploaded:
|
||||
query["wait_until_uploaded"] = "true"
|
||||
|
||||
@@ -869,7 +872,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
|
||||
query["compact"] = "true" if compact else "false"
|
||||
|
||||
log.info(
|
||||
f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}, wait_until_uploaded={wait_until_uploaded}"
|
||||
f"Requesting checkpoint: tenant={tenant_id} timeline={timeline_id} wait_until_flushed={wait_until_flushed} wait_until_uploaded={wait_until_uploaded} compact={compact}"
|
||||
)
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint",
|
||||
|
||||
@@ -15,7 +15,7 @@ from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.utils import skip_in_debug_build, wait_until
|
||||
from fixtures.workload import Workload
|
||||
|
||||
AGGRESIVE_COMPACTION_TENANT_CONF = {
|
||||
AGGRESSIVE_COMPACTION_TENANT_CONF = {
|
||||
# Disable gc and compaction. The test runs compaction manually.
|
||||
"gc_period": "0s",
|
||||
"compaction_period": "0s",
|
||||
@@ -24,6 +24,7 @@ AGGRESIVE_COMPACTION_TENANT_CONF = {
|
||||
# Compact small layers
|
||||
"compaction_target_size": 1024**2,
|
||||
"image_creation_threshold": 2,
|
||||
# "lsn_lease_length": "0s", -- TODO: would cause branch creation errors, should fix later
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +52,7 @@ def test_pageserver_compaction_smoke(
|
||||
page_cache_size=10
|
||||
"""
|
||||
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF)
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=AGGRESSIVE_COMPACTION_TENANT_CONF)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
@@ -120,14 +121,25 @@ page_cache_size=10
|
||||
assert vectored_average < 8
|
||||
|
||||
|
||||
@skip_in_debug_build("only run with release build")
|
||||
def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF)
|
||||
SMOKE_CONF = {
|
||||
# Run both gc and gc-compaction.
|
||||
"gc_period": "5s",
|
||||
"compaction_period": "5s",
|
||||
# No PiTR interval and small GC horizon
|
||||
"pitr_interval": "0s",
|
||||
"gc_horizon": f"{1024 ** 2}",
|
||||
"lsn_lease_length": "0s",
|
||||
}
|
||||
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
row_count = 1000
|
||||
churn_rounds = 10
|
||||
row_count = 10000
|
||||
churn_rounds = 50
|
||||
|
||||
ps_http = env.pageserver.http_client()
|
||||
|
||||
@@ -141,20 +153,27 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
if i % 10 == 0:
|
||||
log.info(f"Running churn round {i}/{churn_rounds} ...")
|
||||
|
||||
workload.churn_rows(row_count, env.pageserver.id)
|
||||
# Force L0 compaction to ensure the number of layers is within bounds, so that gc-compaction can run.
|
||||
ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True)
|
||||
assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1
|
||||
ps_http.timeline_compact(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
enhanced_gc_bottom_most_compaction=True,
|
||||
body={
|
||||
"start": "000000000000000000000000000000000000",
|
||||
"end": "030000000000000000000000000000000000",
|
||||
"scheduled": True,
|
||||
"compact_range": {
|
||||
"start": "000000000000000000000000000000000000",
|
||||
# skip the SLRU range for now -- it races with get-lsn-by-timestamp, TODO: fix this
|
||||
"end": "010000000000000000000000000000000000",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
workload.churn_rows(row_count, env.pageserver.id)
|
||||
|
||||
# ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
|
||||
env.pageserver.assert_log_contains(
|
||||
"scheduled_compact_timeline.*picked .* layers for compaction"
|
||||
)
|
||||
|
||||
log.info("Validating at workload end ...")
|
||||
workload.validate(env.pageserver.id)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user