Compare commits

..

6 Commits

Author SHA1 Message Date
Stas Kelvich
57f51169ef Hacky support for a new connection router in link-proxy
Add a hack to support new way of accessing compute without using a
NodePort. Now to access compute in cross-k8s setup (console->compute
and link-proxy->compute) we need to connect to the pg_sni_router service
using a TLS. Destination compute address is encoded in domain/SNI.

However, for link-proxy it is hard add support for outgoing TLS connections
as our trick with stealing stream from tokio-postgres doesn't work with TLS.
So set sni_host option and use unencrupted connection instead. Once we add
encryption support for outgoing connections to the proxy, we can remove
this hack.
2023-04-27 14:50:52 +03:00
Christian Schwarz
9ea7b5dd38 clean up logging around on-demand downloads (#4030)
- Remove repeated tenant & timeline from span
- Demote logging of the path to debug level
- Log completion at info level, in the same function where we log errors
- distinguish between layer file download success & on-demand download
succeeding as a whole in the log message wording
- Assert that the span contains a tenant id and a timeline id

fixes https://github.com/neondatabase/neon/issues/3945

Before:

```
  INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{tenant_id=$TENANT_ID timeline_id=$TIMELINE_ID layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: download complete: /storage/pageserver/data/tenants/$TENANT_ID/timelines/$TIMELINE_ID/000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91
  INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{tenant_id=$TENANT_ID timeline_id=$TIMELINE_ID layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: Rebuilt layer map. Did 9 insertions to process a batch of 1 updates.
```

After:

```
  INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: layer file download finished
  INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: Rebuilt layer map. Did 9 insertions to process a batch of 1 updates.
  INFO compaction_loop{tenant_id=$TENANT_ID}:compact_timeline{timeline=$TIMELINE_ID}:download_remote_layer{layer=000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000020C8A71-00000000020CAF91}: on-demand download successful
```
2023-04-27 11:54:48 +02:00
Arseny Sher
0112a602e1 Add timeout on proxy -> compute connection establishment.
Otherwise we sit up to default tcp_syn_retries (about 2+ min) before gettings os
error 110 if compute has been migrated to another pod.
2023-04-27 09:50:52 +04:00
Anastasia Lubennikova
92214578af Fix proxy_io_bytes_per_client metric: use branch_id identifier properly. (#4084)
It fixes the miscalculation of the metric for projects that use multiple
branches for the same endpoint.
We were under billing users with such projects. So we need to
communicate the change in Release Notes.
2023-04-26 17:47:54 +03:00
Christian Schwarz
6861259be7 add global metric for unexpected on-demand downloads (#4069)
Until we have toned down the prod logs to zero WARN and ERROR, we want a
dedicated metric for which we can have a dedicated alert.

fixes https://github.com/neondatabase/neon/issues/3924
2023-04-26 15:18:26 +02:00
Sergey Melnikov
11df2ee5d7 Add safekeeper-3.us-east-2.aws.neon.build (#4085) 2023-04-26 14:40:36 +03:00
25 changed files with 471 additions and 134 deletions

View File

@@ -50,5 +50,7 @@ storage:
ansible_host: i-027662bd552bf5db0
safekeeper-2.us-east-2.aws.neon.build:
ansible_host: i-0de0b03a51676a6ce
safekeeper-3.us-east-2.aws.neon.build:
ansible_host: i-05f8ba2cda243bd18
safekeeper-99.us-east-2.aws.neon.build:
ansible_host: i-0d61b6a2ea32028d5

11
Cargo.lock generated
View File

@@ -4629,6 +4629,16 @@ dependencies = [
"valuable",
]
[[package]]
name = "tracing-error"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e"
dependencies = [
"tracing",
"tracing-subscriber",
]
[[package]]
name = "tracing-futures"
version = "0.2.5"
@@ -4879,6 +4889,7 @@ dependencies = [
"thiserror",
"tokio",
"tracing",
"tracing-error",
"tracing-subscriber",
"url",
"uuid",

View File

@@ -110,6 +110,7 @@ toml = "0.7"
toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tracing = "0.1"
tracing-error = "0.2.0"
tracing-opentelemetry = "0.18.0"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
url = "2.2"

View File

@@ -99,7 +99,11 @@ struct S3WithTestBlobs {
#[async_trait::async_trait]
impl AsyncTestContext for MaybeEnabledS3 {
async fn setup() -> Self {
utils::logging::init(utils::logging::LogFormat::Test).expect("logging init failed");
utils::logging::init(
utils::logging::LogFormat::Test,
utils::logging::TracingErrorLayerEnablement::Disabled,
)
.expect("logging init failed");
if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
info!(
"`{}` env variable is not set, skipping the test",

View File

@@ -27,7 +27,8 @@ signal-hook.workspace = true
thiserror.workspace = true
tokio.workspace = true
tracing.workspace = true
tracing-subscriber = { workspace = true, features = ["json"] }
tracing-error.workspace = true
tracing-subscriber = { workspace = true, features = ["json", "registry"] }
rand.workspace = true
serde_with.workspace = true
strum.workspace = true

View File

@@ -54,6 +54,8 @@ pub mod measured_stream;
pub mod serde_percent;
pub mod serde_regex;
pub mod tracing_span_assert;
/// use with fail::cfg("$name", "return(2000)")
#[macro_export]
macro_rules! failpoint_sleep_millis_async {

View File

@@ -56,7 +56,20 @@ where
}
}
pub fn init(log_format: LogFormat) -> anyhow::Result<()> {
/// Whether to add the `tracing_error` crate's `ErrorLayer`
/// to the global tracing subscriber.
///
pub enum TracingErrorLayerEnablement {
/// Do not add the `ErrorLayer`.
Disabled,
/// Add the `ErrorLayer` with the filter specified by RUST_LOG, defaulting to `info` if `RUST_LOG` is unset.
EnableWithRustLogFilter,
}
pub fn init(
log_format: LogFormat,
tracing_error_layer_enablement: TracingErrorLayerEnablement,
) -> anyhow::Result<()> {
// We fall back to printing all spans at info-level or above if
// the RUST_LOG environment variable is not set.
let rust_log_env_filter = || {
@@ -67,21 +80,26 @@ pub fn init(log_format: LogFormat) -> anyhow::Result<()> {
// NB: the order of the with() calls does not matter.
// See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
use tracing_subscriber::prelude::*;
tracing_subscriber::registry()
.with({
let log_layer = tracing_subscriber::fmt::layer()
.with_target(false)
.with_ansi(atty::is(atty::Stream::Stdout))
.with_writer(std::io::stdout);
let log_layer = match log_format {
LogFormat::Json => log_layer.json().boxed(),
LogFormat::Plain => log_layer.boxed(),
LogFormat::Test => log_layer.with_test_writer().boxed(),
};
log_layer.with_filter(rust_log_env_filter())
})
.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()))
.init();
let r = tracing_subscriber::registry();
let r = r.with({
let log_layer = tracing_subscriber::fmt::layer()
.with_target(false)
.with_ansi(atty::is(atty::Stream::Stdout))
.with_writer(std::io::stdout);
let log_layer = match log_format {
LogFormat::Json => log_layer.json().boxed(),
LogFormat::Plain => log_layer.boxed(),
LogFormat::Test => log_layer.with_test_writer().boxed(),
};
log_layer.with_filter(rust_log_env_filter())
});
let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
match tracing_error_layer_enablement {
TracingErrorLayerEnablement::EnableWithRustLogFilter => r
.with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
.init(),
TracingErrorLayerEnablement::Disabled => r.init(),
}
Ok(())
}

View File

@@ -0,0 +1,287 @@
//! Assert that the current [`tracing::Span`] has a given set of fields.
//!
//! # Usage
//!
//! ```
//! use tracing_subscriber::prelude::*;
//! let registry = tracing_subscriber::registry()
//! .with(tracing_error::ErrorLayer::default());
//!
//! // Register the registry as the global subscriber.
//! // In this example, we'll only use it as a thread-local subscriber.
//! let _guard = tracing::subscriber::set_default(registry);
//!
//! // Then, in the main code:
//!
//! let span = tracing::info_span!("TestSpan", test_id = 1);
//! let _guard = span.enter();
//!
//! // ... down the call stack
//!
//! use utils::tracing_span_assert::{check_fields_present, MultiNameExtractor};
//! let extractor = MultiNameExtractor::new("TestExtractor", ["test", "test_id"]);
//! match check_fields_present([&extractor]) {
//! Ok(()) => {},
//! Err(missing) => {
//! panic!("Missing fields: {:?}", missing.into_iter().map(|f| f.name() ).collect::<Vec<_>>());
//! }
//! }
//! ```
//!
//! Recommended reading: https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
//!
use std::{
collections::HashSet,
fmt::{self},
hash::{Hash, Hasher},
};
pub enum ExtractionResult {
Present,
Absent,
}
pub trait Extractor: Send + Sync + std::fmt::Debug {
fn name(&self) -> &str;
fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult;
}
#[derive(Debug)]
pub struct MultiNameExtractor<const L: usize> {
name: &'static str,
field_names: [&'static str; L],
}
impl<const L: usize> MultiNameExtractor<L> {
pub fn new(name: &'static str, field_names: [&'static str; L]) -> MultiNameExtractor<L> {
MultiNameExtractor { name, field_names }
}
}
impl<const L: usize> Extractor for MultiNameExtractor<L> {
fn name(&self) -> &str {
self.name
}
fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult {
if fields.iter().any(|f| self.field_names.contains(&f.name())) {
ExtractionResult::Present
} else {
ExtractionResult::Absent
}
}
}
struct MemoryIdentity<'a>(&'a dyn Extractor);
impl<'a> MemoryIdentity<'a> {
fn as_ptr(&self) -> *const () {
self.0 as *const _ as *const ()
}
}
impl<'a> PartialEq for MemoryIdentity<'a> {
fn eq(&self, other: &Self) -> bool {
self.as_ptr() == other.as_ptr()
}
}
impl<'a> Eq for MemoryIdentity<'a> {}
impl<'a> Hash for MemoryIdentity<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_ptr().hash(state);
}
}
impl<'a> fmt::Debug for MemoryIdentity<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:p}: {}", self.as_ptr(), self.0.name())
}
}
/// The extractor names passed as keys to [`new`].
pub fn check_fields_present<const L: usize>(
must_be_present: [&dyn Extractor; L],
) -> Result<(), Vec<&dyn Extractor>> {
let mut missing: HashSet<MemoryIdentity> =
HashSet::from_iter(must_be_present.into_iter().map(|r| MemoryIdentity(r)));
let trace = tracing_error::SpanTrace::capture();
trace.with_spans(|md, _formatted_fields| {
missing.retain(|extractor| match extractor.0.extract(md.fields()) {
ExtractionResult::Present => false,
ExtractionResult::Absent => true,
});
!missing.is_empty() // continue walking up until we've found all missing
});
if missing.is_empty() {
Ok(())
} else {
Err(missing.into_iter().map(|mi| mi.0).collect())
}
}
#[cfg(test)]
mod tests {
use tracing_subscriber::prelude::*;
use super::*;
struct Setup {
_current_thread_subscriber_guard: tracing::subscriber::DefaultGuard,
tenant_extractor: MultiNameExtractor<2>,
timeline_extractor: MultiNameExtractor<2>,
}
fn setup_current_thread() -> Setup {
let tenant_extractor = MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]);
let timeline_extractor = MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]);
let registry = tracing_subscriber::registry()
.with(tracing_subscriber::fmt::layer())
.with(tracing_error::ErrorLayer::default());
let guard = tracing::subscriber::set_default(registry);
Setup {
_current_thread_subscriber_guard: guard,
tenant_extractor,
timeline_extractor,
}
}
fn assert_missing(missing: Vec<&dyn Extractor>, expected: Vec<&dyn Extractor>) {
let missing: HashSet<MemoryIdentity> =
HashSet::from_iter(missing.into_iter().map(MemoryIdentity));
let expected: HashSet<MemoryIdentity> =
HashSet::from_iter(expected.into_iter().map(MemoryIdentity));
assert_eq!(missing, expected);
}
#[test]
fn positive_one_level() {
let setup = setup_current_thread();
let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1");
let _guard = span.enter();
check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap();
}
#[test]
fn negative_one_level() {
let setup = setup_current_thread();
let span = tracing::info_span!("root", timeline_id = "timeline-1");
let _guard = span.enter();
let missing =
check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap_err();
assert_missing(missing, vec![&setup.tenant_extractor]);
}
#[test]
fn positive_multiple_levels() {
let setup = setup_current_thread();
let span = tracing::info_span!("root");
let _guard = span.enter();
let span = tracing::info_span!("child", tenant_id = "tenant-1");
let _guard = span.enter();
let span = tracing::info_span!("grandchild", timeline_id = "timeline-1");
let _guard = span.enter();
check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap();
}
#[test]
fn negative_multiple_levels() {
let setup = setup_current_thread();
let span = tracing::info_span!("root");
let _guard = span.enter();
let span = tracing::info_span!("child", timeline_id = "timeline-1");
let _guard = span.enter();
let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err();
assert_missing(missing, vec![&setup.tenant_extractor]);
}
#[test]
fn positive_subset_one_level() {
let setup = setup_current_thread();
let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1");
let _guard = span.enter();
check_fields_present([&setup.tenant_extractor]).unwrap();
}
#[test]
fn positive_subset_multiple_levels() {
let setup = setup_current_thread();
let span = tracing::info_span!("root");
let _guard = span.enter();
let span = tracing::info_span!("child", tenant_id = "tenant-1");
let _guard = span.enter();
let span = tracing::info_span!("grandchild", timeline_id = "timeline-1");
let _guard = span.enter();
check_fields_present([&setup.tenant_extractor]).unwrap();
}
#[test]
fn negative_subset_one_level() {
let setup = setup_current_thread();
let span = tracing::info_span!("root", timeline_id = "timeline-1");
let _guard = span.enter();
let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err();
assert_missing(missing, vec![&setup.tenant_extractor]);
}
#[test]
fn negative_subset_multiple_levels() {
let setup = setup_current_thread();
let span = tracing::info_span!("root");
let _guard = span.enter();
let span = tracing::info_span!("child", timeline_id = "timeline-1");
let _guard = span.enter();
let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err();
assert_missing(missing, vec![&setup.tenant_extractor]);
}
#[test]
fn tracing_error_subscriber_not_set_up() {
// no setup
let span = tracing::info_span!("foo", e = "some value");
let _guard = span.enter();
let extractor = MultiNameExtractor::new("E", ["e"]);
let missing = check_fields_present([&extractor]).unwrap_err();
assert_missing(missing, vec![&extractor]);
}
#[test]
#[should_panic]
fn panics_if_tracing_error_subscriber_has_wrong_filter() {
let r = tracing_subscriber::registry().with({
tracing_error::ErrorLayer::default().with_filter(
tracing_subscriber::filter::dynamic_filter_fn(|md, _| {
if md.is_span() && *md.level() == tracing::Level::INFO {
return false;
}
true
}),
)
});
let _guard = tracing::subscriber::set_default(r);
let span = tracing::info_span!("foo", e = "some value");
let _guard = span.enter();
let extractor = MultiNameExtractor::new("E", ["e"]);
let missing = check_fields_present([&extractor]).unwrap_err();
assert_missing(missing, vec![&extractor]);
}
}

View File

@@ -25,6 +25,7 @@ use pageserver::{
virtual_file,
};
use postgres_backend::AuthType;
use utils::logging::TracingErrorLayerEnablement;
use utils::signals::ShutdownSignals;
use utils::{
auth::JwtAuth, logging, project_git_version, sentry_init::init_sentry, signals::Signal,
@@ -86,8 +87,19 @@ fn main() -> anyhow::Result<()> {
}
};
// Initialize logging, which must be initialized before the custom panic hook is installed.
logging::init(conf.log_format)?;
// Initialize logging.
//
// It must be initialized before the custom panic hook is installed below.
//
// Regarding tracing_error enablement: at this time, we only use the
// tracing_error crate to debug_assert that log spans contain tenant and timeline ids.
// See `debug_assert_current_span_has_tenant_and_timeline_id` in the timeline module
let tracing_error_layer_enablement = if cfg!(debug_assertions) {
TracingErrorLayerEnablement::EnableWithRustLogFilter
} else {
TracingErrorLayerEnablement::Disabled
};
logging::init(conf.log_format, tracing_error_layer_enablement)?;
// mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
// disarming this hook on pageserver, because we never tear down tracing.
@@ -226,6 +238,7 @@ fn start_pageserver(
);
set_build_info_metric(GIT_VERSION);
set_launch_timestamp_metric(launch_ts);
pageserver::preinitialize_metrics();
// If any failpoints were set from FAILPOINTS environment variable,
// print them to the log for debugging purposes

View File

@@ -44,6 +44,8 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61;
static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);
pub use crate::metrics::preinitialize_metrics;
pub async fn shutdown_pageserver(exit_code: i32) {
// Shut down the libpq endpoint task. This prevents new connections from
// being accepted.

View File

@@ -205,6 +205,15 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
.expect("failed to define a metric")
});
pub static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"pageserver_unexpected_ondemand_downloads_count",
"Number of unexpected on-demand downloads. \
We log more context for each increment, so, forgo any labels in this metric.",
)
.expect("failed to define a metric")
});
/// Each [`Timeline`]'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
#[derive(Debug)]
pub struct EvictionsWithLowResidenceDuration {
@@ -1132,3 +1141,10 @@ impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
poll_result
}
}
pub fn preinitialize_metrics() {
// We want to alert on this metric increasing.
// Initialize it eagerly, so that our alert rule can distinguish absence of the metric from metric value 0.
assert_eq!(UNEXPECTED_ONDEMAND_DOWNLOADS.get(), 0);
UNEXPECTED_ONDEMAND_DOWNLOADS.reset();
}

View File

@@ -2886,7 +2886,13 @@ pub mod harness {
};
LOG_HANDLE.get_or_init(|| {
logging::init(logging::LogFormat::Test).expect("Failed to init test logging")
logging::init(
logging::LogFormat::Test,
// enable it in case in case the tests exercise code paths that use
// debug_assert_current_span_has_tenant_and_timeline_id
logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
)
.expect("Failed to init test logging")
});
let repo_dir = PageServerConf::test_repo_dir(test_name);

View File

@@ -16,6 +16,7 @@ use tracing::{info, warn};
use crate::config::PageServerConf;
use crate::tenant::storage_layer::LayerFileName;
use crate::tenant::timeline::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
use remote_storage::{DownloadError, GenericRemoteStorage};
use utils::crashsafe::path_with_suffix_extension;
@@ -43,6 +44,8 @@ pub async fn download_layer_file<'a>(
layer_file_name: &'a LayerFileName,
layer_metadata: &'a LayerFileMetadata,
) -> Result<u64, DownloadError> {
debug_assert_current_span_has_tenant_and_timeline_id();
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
let local_path = timeline_path.join(layer_file_name.file_name());
@@ -154,7 +157,7 @@ pub async fn download_layer_file<'a>(
.with_context(|| format!("Could not fsync layer file {}", local_path.display(),))
.map_err(DownloadError::Other)?;
tracing::info!("download complete: {}", local_path.display());
tracing::debug!("download complete: {}", local_path.display());
Ok(bytes_amount)
}

View File

@@ -19,6 +19,7 @@ use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError};
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::id::TenantTimelineId;
use utils::tracing_span_assert;
use std::cmp::{max, min, Ordering};
use std::collections::BinaryHeap;
@@ -48,7 +49,7 @@ use crate::tenant::{
use crate::config::PageServerConf;
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::metrics::TimelineMetrics;
use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS};
use crate::pgdatadir_mapping::LsnForTimestamp;
use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
@@ -936,6 +937,7 @@ impl Timeline {
}
}
#[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))]
pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
let Some(layer) = self.find_layer(layer_file_name) else { return Ok(None) };
let Some(remote_layer) = layer.downcast_remote_layer() else { return Ok(Some(false)) };
@@ -2355,6 +2357,7 @@ impl Timeline {
id,
ctx.task_kind()
);
UNEXPECTED_ONDEMAND_DOWNLOADS.inc();
timeline.download_remote_layer(remote_layer).await?;
continue 'layer_map_search;
}
@@ -3818,11 +3821,13 @@ impl Timeline {
/// If the caller has a deadline or needs a timeout, they can simply stop polling:
/// we're **cancellation-safe** because the download happens in a separate task_mgr task.
/// So, the current download attempt will run to completion even if we stop polling.
#[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%remote_layer.short_id()))]
#[instrument(skip_all, fields(layer=%remote_layer.short_id()))]
pub async fn download_remote_layer(
&self,
remote_layer: Arc<RemoteLayer>,
) -> anyhow::Result<()> {
debug_assert_current_span_has_tenant_and_timeline_id();
use std::sync::atomic::Ordering::Relaxed;
let permit = match Arc::clone(&remote_layer.ongoing_download)
@@ -3866,6 +3871,8 @@ impl Timeline {
.await;
if let Ok(size) = &result {
info!("layer file download finished");
// XXX the temp file is still around in Err() case
// and consumes space until we clean up upon pageserver restart.
self_clone.metrics.resident_physical_size_gauge.add(*size);
@@ -3937,6 +3944,8 @@ impl Timeline {
updates.flush();
drop(layers);
info!("on-demand download successful");
// Now that we've inserted the download into the layer map,
// close the semaphore. This will make other waiters for
// this download return Ok(()).
@@ -3944,7 +3953,7 @@ impl Timeline {
remote_layer.ongoing_download.close();
} else {
// Keep semaphore open. We'll drop the permit at the end of the function.
error!("on-demand download failed: {:?}", result.as_ref().unwrap_err());
error!("layer file download failed: {:?}", result.as_ref().unwrap_err());
}
// Don't treat it as an error if the task that triggered the download
@@ -4255,3 +4264,30 @@ fn rename_to_backup(path: &Path) -> anyhow::Result<()> {
bail!("couldn't find an unused backup number for {:?}", path)
}
#[inline]
pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy<
tracing_span_assert::MultiNameExtractor<2>,
> = once_cell::sync::Lazy::new(|| {
tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"])
});
pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy<
tracing_span_assert::MultiNameExtractor<2>,
> = once_cell::sync::Lazy::new(|| {
tracing_span_assert::MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"])
});
#[cfg(debug_assertions)]
match tracing_span_assert::check_fields_present([
&*TENANT_ID_EXTRACTOR,
&*TIMELINE_ID_EXTRACTOR,
]) {
Ok(()) => (),
Err(missing) => panic!(
"missing extractors: {:?}",
missing.into_iter().map(|e| e.name()).collect::<Vec<_>>()
),
}
}

View File

@@ -348,7 +348,7 @@ impl ConnectionManagerState {
.context("walreceiver connection handling failure")
}
.instrument(
info_span!("walreceiver_connection", id = %id, node_id = %new_sk.safekeeper_id),
info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, node_id = %new_sk.safekeeper_id),
)
});

View File

@@ -32,11 +32,3 @@ CREATE VIEW local_cache AS
SELECT P.* FROM local_cache_pages() AS P
(pageoffs int8, relfilenode oid, reltablespace oid, reldatabase oid,
relforknumber int2, relblocknumber int8, accesscount int4);
create table neon_prepared_statements(
client_id text not null,
stmt_name text not null,
stmt_body text not null,
from_sql boolean not null,
primary key(client_id, stmt_name)
);

View File

@@ -13,15 +13,12 @@
#include "access/xact.h"
#include "access/xlog.h"
#include "commands/prepare.h"
#include "executor/spi.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "catalog/pg_type.h"
#include "replication/walsender.h"
#include "funcapi.h"
#include "access/htup_details.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
#include "utils/guc.h"
@@ -31,31 +28,12 @@
PG_MODULE_MAGIC;
void _PG_init(void);
static char* neon_load_prepared_statement(char const* stmt_name, bool* from_sql);
static void neon_save_prepared_statement(char const* stmt_name, char const* stmt_body, bool from_sql);
static bool neon_drop_prepared_statement(char const* stmt_name);
static bool save_parepared_statememts;
void
_PG_init(void)
{
pg_init_libpagestore();
pg_init_walproposer();
DefineCustomBoolVariable("neon.save_prepared_statements",
"Support prepared statements in case of using connetion pooler",
NULL,
&save_parepared_statememts,
false, /* disabled by default */
PGC_POSTMASTER,
0,
NULL,
NULL,
NULL);
save_prepared_statement_hook = neon_save_prepared_statement;
load_prepared_statement_hook = neon_load_prepared_statement;
drop_prepared_statement_hook = neon_drop_prepared_statement;
EmitWarningsOnPlaceholders("neon");
}
@@ -107,72 +85,3 @@ backpressure_throttling_time(PG_FUNCTION_ARGS)
{
PG_RETURN_UINT64(BackpressureThrottlingTime());
}
static char*
neon_load_prepared_statement(char const* stmt_name, bool* from_sql)
{
char* stmt_body = NULL;
if (save_parepared_statememts)
{
int rc;
Oid param_types[2] = {TEXTOID, TEXTOID};
Datum param_values[2] = {CStringGetTextDatum(application_name), CStringGetTextDatum(stmt_name)};
bool is_null;
MemoryContext call_ctx = CurrentMemoryContext;
SPI_connect();
rc = SPI_execute_with_args("select stmt_body,from_sql from neon_prepared_statements where client_id=$1 and stmt_name=$2",
2, param_types, param_values, NULL, true, 1);
if (rc != SPI_OK_SELECT || SPI_processed != 1) {
SPI_finish();
elog(LOG, "Prepared statement %s not found for client %s", stmt_name, application_name);
return NULL;
}
stmt_body = SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1);
stmt_body = MemoryContextStrdup(call_ctx, stmt_body);
*from_sql = DatumGetBool(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &is_null));
SPI_finish();
}
return stmt_body;
}
static void
neon_save_prepared_statement(char const* stmt_name, char const* stmt_body, bool from_sql)
{
if (save_parepared_statememts)
{
int rc;
Oid param_types[4] = {TEXTOID, TEXTOID, TEXTOID, BOOLOID};
Datum param_values[4] = {CStringGetTextDatum(application_name), CStringGetTextDatum(stmt_name), CStringGetTextDatum(stmt_body), BoolGetDatum(from_sql)};
SPI_connect();
rc = SPI_execute_with_args("insert into neon_prepared_statements values($1,$2,$3,$4) on conflict (client_id,stmt_name) do update set stmt_body=EXCLUDED.stmt_body, from_sql=EXCLUDED.from_sql",
4, param_types, param_values, NULL, false, 1);
if (rc != SPI_OK_INSERT && rc != SPI_OK_UPDATE)
elog(LOG, "Failed to persist prepared statement %s for client %s", stmt_name, application_name);
SPI_finish();
}
}
static bool
neon_drop_prepared_statement(char const* stmt_name)
{
if (save_parepared_statememts)
{
int rc;
Oid param_types[2] = {TEXTOID, TEXTOID};
Datum param_values[2] = {CStringGetTextDatum(application_name), CStringGetTextDatum(stmt_name)};
SPI_connect();
rc = SPI_execute_with_args("delete from neon_prepared_statements where client_id=$1 and stmt_name=$2",
2, param_types, param_values, NULL, false, 1);
if (rc != SPI_OK_DELETE || SPI_processed != 1) {
SPI_finish();
elog(LOG, "Prepared statement %s not found for client %s", stmt_name, application_name);
return false;
}
SPI_finish();
return true;
}
return false;
}

View File

@@ -87,6 +87,20 @@ pub(super) async fn authenticate(
.dbname(&db_info.dbname)
.user(&db_info.user);
// That is a hack to support new way of accessing compute without using a
// NodePort. Now to access compute in cross-k8s setup (console->compute
// and link-proxy->compute) we need to connect to the pg_sni_router service
// using a TLS. Destination compute address is encoded in domain/SNI.
//
// However, for link-proxy it is hard add support for outgoing TLS connections
// as our trick with stealing stream from tokio-postgres doesn't work with TLS.
// So set sni_host option and use unencrupted connection instead. Once we add
// encryption support for outgoing connections to the proxy, we can remove
// this hack.
if db_info.host.contains("cluster.local") {
config.options(format!("sni_host={}", db_info.host).as_str());
}
if let Some(password) = db_info.password {
config.password(password.as_ref());
}

View File

@@ -1,8 +1,8 @@
use crate::{cancellation::CancelClosure, error::UserFacingError};
use futures::TryFutureExt;
use futures::{FutureExt, TryFutureExt};
use itertools::Itertools;
use pq_proto::StartupMessageParams;
use std::{io, net::SocketAddr};
use std::{io, net::SocketAddr, time::Duration};
use thiserror::Error;
use tokio::net::TcpStream;
use tokio_postgres::NoTls;
@@ -130,9 +130,23 @@ impl ConnCfg {
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
use tokio_postgres::config::Host;
// wrap TcpStream::connect with timeout
let connect_with_timeout = |host, port| {
let connection_timeout = Duration::from_millis(10000);
tokio::time::timeout(connection_timeout, TcpStream::connect((host, port))).map(
move |res| match res {
Ok(tcpstream_connect_res) => tcpstream_connect_res,
Err(_) => Err(io::Error::new(
io::ErrorKind::TimedOut,
format!("exceeded connection timeout {connection_timeout:?}"),
)),
},
)
};
let connect_once = |host, port| {
info!("trying to connect to compute node at {host}:{port}");
TcpStream::connect((host, port)).and_then(|socket| async {
connect_with_timeout(host, port).and_then(|socket| async {
let socket_addr = socket.peer_addr()?;
// This prevents load balancer from severing the connection.
socket2::SockRef::from(&socket).set_keepalive(true)?;
@@ -165,7 +179,6 @@ impl ConnCfg {
Host::Unix(_) => continue, // unix sockets are not welcome here
};
// TODO: maybe we should add a timeout.
match connect_once(host, *port).await {
Ok(socket) => return Ok(socket),
Err(err) => {

View File

@@ -95,7 +95,7 @@ fn gather_proxy_io_bytes_per_client() -> Vec<(Ids, (u64, DateTime<Utc>))> {
current_metrics.push((
Ids {
endpoint_id: endpoint_id.to_string(),
branch_id: "".to_string(),
branch_id: branch_id.to_string(),
},
(value, Utc::now()),
));

View File

@@ -134,7 +134,10 @@ fn main() -> anyhow::Result<()> {
// 1. init logging
// 2. tracing panic hook
// 3. sentry
logging::init(LogFormat::from_config(&args.log_format)?)?;
logging::init(
LogFormat::from_config(&args.log_format)?,
logging::TracingErrorLayerEnablement::Disabled,
)?;
logging::replace_panic_hook_with_tracing_panic_hook().forget();
info!("version: {GIT_VERSION}");

View File

@@ -430,7 +430,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// 1. init logging
// 2. tracing panic hook
// 3. sentry
logging::init(LogFormat::from_config(&args.log_format)?)?;
logging::init(
LogFormat::from_config(&args.log_format)?,
logging::TracingErrorLayerEnablement::Disabled,
)?;
logging::replace_panic_hook_with_tracing_panic_hook().forget();
// initialize sentry if SENTRY_DSN is provided
let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);

View File

@@ -53,6 +53,7 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
"pageserver_storage_operations_seconds_global_count",
"pageserver_storage_operations_seconds_global_sum",
"pageserver_storage_operations_seconds_global_bucket",
"pageserver_unexpected_ondemand_downloads_count_total",
"libmetrics_launch_timestamp",
"libmetrics_build_info",
"libmetrics_tracing_event_count_total",