Compare commits

..

3 Commits

Author SHA1 Message Date
Sasha Krassovsky
a9d281ddbf wtf is cargo hakari?! 2024-06-17 16:31:36 -07:00
Sasha Krassovsky
e709848629 Make the code completely unreadable 2024-06-17 15:53:14 -07:00
Sasha Krassovsky
7e9a03505d Make activity monitor take pg_stat_wal_receiver into account 2024-06-17 15:51:05 -07:00
16 changed files with 105 additions and 274 deletions

View File

@@ -299,21 +299,21 @@ jobs:
uses: actions/cache@v4
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v4
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v4
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'

4
Cargo.lock generated
View File

@@ -4037,6 +4037,7 @@ version = "0.2.4"
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
dependencies = [
"bytes",
"chrono",
"fallible-iterator",
"postgres-protocol",
]
@@ -7394,6 +7395,8 @@ dependencies = [
"num-traits",
"once_cell",
"parquet",
"postgres",
"postgres-types",
"prost",
"rand 0.8.5",
"regex",
@@ -7414,6 +7417,7 @@ dependencies = [
"time",
"time-macros",
"tokio",
"tokio-postgres",
"tokio-rustls 0.24.0",
"tokio-util",
"toml_datetime",

View File

@@ -69,6 +69,8 @@ RUN set -e \
&& apt install -y \
libreadline-dev \
libseccomp-dev \
libicu67 \
openssl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \

View File

@@ -112,45 +112,6 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
&& make install \
&& rm -rf ../lcov.tar.gz
# Compile and install the static OpenSSL library
ENV OPENSSL_VERSION=3.2.2
ENV OPENSSL_PREFIX=/usr/local/openssl
RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
echo "197149c18d9e9f292c43f0400acaba12e5f52cacfe050f3d199277ea738ec2e7 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
cd /tmp && \
tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
cd /tmp/openssl-${OPENSSL_VERSION} && \
./config --prefix=${OPENSSL_PREFIX} -static --static no-shared -fPIC && \
make -j "$(nproc)" && \
make install && \
cd /tmp && \
rm -rf /tmp/openssl-${OPENSSL_VERSION}
# Use the same version of libicu as the compute nodes so that
# clusters created using inidb on pageserver can be used by computes.
#
# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
# package, which is 67.1. We're duplicating that knowledge here, and also, technically,
# Debian has a few patches on top of 67.1 that we're not adding here.
ENV ICU_VERSION=67.1
ENV ICU_PREFIX=/usr/local/icu
# Download and build static ICU
RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
mkdir /tmp/icu && \
pushd /tmp/icu && \
tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \
pushd icu/source && \
./configure --prefix=${ICU_PREFIX} --enable-static --enable-shared=no CXXFLAGS="-fPIC" CFLAGS="-fPIC" && \
make -j "$(nproc)" && \
make install && \
popd && \
rm -rf icu && \
rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
popd
# Switch to nonroot user
USER nonroot:nonroot
WORKDIR /home/nonroot
@@ -209,6 +170,3 @@ RUN whoami \
&& rustup --version --verbose \
&& rustc --version --verbose \
&& clang --version
# Set following flag to check in Makefile if its running in Docker
RUN touch /home/nonroot/.docker_build

View File

@@ -3,9 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
OPENSSL_PREFIX_DIR := /usr/local/openssl
ICU_PREFIX_DIR := /usr/local/icu
#
# We differentiate between release / debug build types using the BUILD_TYPE
# environment variable.
@@ -23,16 +20,6 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
# Exclude static build openssl, icu for local build (MacOS, Linux)
# Only keep for build type release and debug
PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
PG_CONFIGURE_OPTS += --with-icu
PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
endif
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux
@@ -41,7 +28,7 @@ else ifeq ($(UNAME_S),Darwin)
ifndef DISABLE_HOMEBREW
# macOS with brew-installed openssl requires explicit paths
# It can be configured with OPENSSL_PREFIX variable
OPENSSL_PREFIX := $(shell brew --prefix openssl@3)
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure

View File

@@ -17,7 +17,7 @@ nix.workspace = true
notify.workspace = true
num_cpus.workspace = true
opentelemetry.workspace = true
postgres.workspace = true
postgres = { workspace = true, features = ["with-chrono-0_4"] }
regex.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -918,39 +918,38 @@ impl ComputeNode {
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are reconfiguring:
// creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
self.pg_reload_conf()?;
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
// Proceed with post-startup configuration. Note, that order of operations is important.
// Disable DDL forwarding because control plane already knows about these roles/databases.
if spec.mode == ComputeMode::Primary {
client.simple_query("SET neon.forward_ddl = false")?;
cleanup_instance(&mut client)?;
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(
&spec,
&mut client,
self.connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension),
)?;
handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure.
}
// Proceed with post-startup configuration. Note, that order of operations is important.
// Disable DDL forwarding because control plane already knows about these roles/databases.
if spec.mode == ComputeMode::Primary {
client.simple_query("SET neon.forward_ddl = false")?;
cleanup_instance(&mut client)?;
handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants(
&spec,
&mut client,
self.connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension),
)?;
handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure.
}
// 'Close' connection
drop(client);
Ok(())
})?;
// 'Close' connection
drop(client);
// reset max_cluster_size in config back to original value and reload config
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?;
let unknown_op = "unknown".to_string();
@@ -1041,17 +1040,12 @@ impl ComputeNode {
// temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(
pgdata_path,
"neon.max_cluster_size=-1",
|| {
self.pg_reload_conf()?;
config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?;
self.apply_config(&compute_state)?;
self.apply_config(&compute_state)?;
Ok(())
},
)?;
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?;
}
self.post_apply_config()?;

View File

@@ -131,17 +131,18 @@ pub fn write_postgres_conf(
Ok(())
}
pub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()>
where
F: FnOnce() -> Result<()>,
{
/// create file compute_ctl_temp_override.conf in pgdata_dir
/// add provided options to this file
pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
let path = pgdata_path.join("compute_ctl_temp_override.conf");
let mut file = File::create(path)?;
write!(file, "{}", options)?;
let res = exec();
file.set_len(0)?;
res
Ok(())
}
/// remove file compute_ctl_temp_override.conf in pgdata_dir
pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
let path = pgdata_path.join("compute_ctl_temp_override.conf");
std::fs::remove_file(path)?;
Ok(())
}

View File

@@ -17,7 +17,7 @@ use hyper::header::CONTENT_TYPE;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use tokio::task;
use tracing::{debug, error, info, warn};
use tracing::{error, info, warn};
use tracing_utils::http::OtelName;
use utils::http::request::must_get_query_param;
@@ -48,7 +48,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
match (req.method(), req.uri().path()) {
// Serialized compute state.
(&Method::GET, "/status") => {
debug!("serving /status GET request");
info!("serving /status GET request");
let state = compute.state.lock().unwrap();
let status_response = status_response_from_state(&state);
Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))

View File

@@ -165,6 +165,31 @@ fn watch_compute_activity(compute: &ComputeNode) {
continue;
}
}
//
// Don't suspend compute if there is activity in physical replication
//
let physical_replication_query =
"select last_msg_receipt_time from pg_stat_wal_receiver;";
match cli.query_opt(physical_replication_query, &[]) {
Ok(Some(row)) => {
match row.try_get::<&str, DateTime<Utc>>("last_msg_receipt_time") {
Ok(last_msg_receipt_time) => {
compute.update_last_active(Some(last_msg_receipt_time));
continue;
}
Err(e) => {
warn!("failed to parse `pg_stat_wal_receiver` `last_msg_receipt_time`: {:?}", e);
continue;
}
}
}
Ok(None) => { /* fall through */ }
Err(e) => {
warn!("Failed to query `pg_stat_wal_receiver`: {:?}", e);
continue;
}
}
//
// Do not suspend compute if autovacuum is running
//

View File

@@ -25,8 +25,6 @@ pub struct Config {
///
/// For simplicity, this value must be greater than or equal to `memory_history_len`.
memory_history_log_interval: usize,
/// The max number of iterations to skip before logging the next iteration
memory_history_log_noskip_interval: Duration,
}
impl Default for Config {
@@ -35,7 +33,6 @@ impl Default for Config {
memory_poll_interval: Duration::from_millis(100),
memory_history_len: 5, // use 500ms of history for decision-making
memory_history_log_interval: 20, // but only log every ~2s (otherwise it's spammy)
memory_history_log_noskip_interval: Duration::from_secs(15), // but only if it's changed, or 60 seconds have passed
}
}
}
@@ -88,12 +85,7 @@ impl CgroupWatcher {
// buffer for samples that will be logged. once full, it remains so.
let history_log_len = self.config.memory_history_log_interval;
let max_skip = self.config.memory_history_log_noskip_interval;
let mut history_log_buf = vec![MemoryStatus::zeroed(); history_log_len];
let mut last_logged_memusage = MemoryStatus::zeroed();
// Ensure that we're tracking a value that's definitely in the past, as Instant::now is only guaranteed to be non-decreasing on Rust's T1-supported systems.
let mut can_skip_logs_until = Instant::now() - max_skip;
for t in 0_u64.. {
ticker.tick().await;
@@ -123,24 +115,12 @@ impl CgroupWatcher {
// equal to the logging interval, we can just log the entire buffer every time we set
// the last entry, which also means that for this log line, we can ignore that it's a
// ring buffer (because all the entries are in order of increasing time).
//
// We skip logging the data if data hasn't meaningfully changed in a while, unless
// we've already ignored previous iterations for the last max_skip period.
if i == history_log_len - 1
&& (now > can_skip_logs_until
|| !history_log_buf
.iter()
.all(|usage| last_logged_memusage.status_is_close_or_similar(usage)))
{
if i == history_log_len - 1 {
info!(
history = ?MemoryStatus::debug_slice(&history_log_buf),
summary = ?summary,
"Recent cgroup memory statistics history"
);
can_skip_logs_until = now + max_skip;
last_logged_memusage = *history_log_buf.last().unwrap();
}
updates
@@ -252,24 +232,6 @@ impl MemoryStatus {
DS(slice)
}
/// Check if the other memory status is a close or similar result.
/// Returns true if the larger value is not larger than the smaller value
/// by 1/8 of the smaller value, and within 128MiB.
/// See tests::check_similarity_behaviour for examples of behaviour
fn status_is_close_or_similar(&self, other: &MemoryStatus) -> bool {
let margin;
let diff;
if self.non_reclaimable >= other.non_reclaimable {
margin = other.non_reclaimable / 8;
diff = self.non_reclaimable - other.non_reclaimable;
} else {
margin = self.non_reclaimable / 8;
diff = other.non_reclaimable - self.non_reclaimable;
}
diff < margin && diff < 128 * 1024 * 1024
}
}
#[cfg(test)]
@@ -299,65 +261,4 @@ mod tests {
assert_eq!(values(2, 4), [9, 0, 1, 2]);
assert_eq!(values(2, 10), [3, 4, 5, 6, 7, 8, 9, 0, 1, 2]);
}
#[test]
fn check_similarity_behaviour() {
// This all accesses private methods, so we can't actually run this
// as doctests, because doctests run as an external crate.
let mut small = super::MemoryStatus {
non_reclaimable: 1024,
};
let mut large = super::MemoryStatus {
non_reclaimable: 1024 * 1024 * 1024 * 1024,
};
// objects are self-similar, no matter the size
assert!(small.status_is_close_or_similar(&small));
assert!(large.status_is_close_or_similar(&large));
// inequality is symmetric
assert!(!small.status_is_close_or_similar(&large));
assert!(!large.status_is_close_or_similar(&small));
small.non_reclaimable = 64;
large.non_reclaimable = (small.non_reclaimable / 8) * 9;
// objects are self-similar, no matter the size
assert!(small.status_is_close_or_similar(&small));
assert!(large.status_is_close_or_similar(&large));
// values are similar if the larger value is larger by less than
// 12.5%, i.e. 1/8 of the smaller value.
// In the example above, large is exactly 12.5% larger, so this doesn't
// match.
assert!(!small.status_is_close_or_similar(&large));
assert!(!large.status_is_close_or_similar(&small));
large.non_reclaimable -= 1;
assert!(large.status_is_close_or_similar(&large));
assert!(small.status_is_close_or_similar(&large));
assert!(large.status_is_close_or_similar(&small));
// The 1/8 rule only applies up to 128MiB of difference
small.non_reclaimable = 1024 * 1024 * 1024 * 1024;
large.non_reclaimable = small.non_reclaimable / 8 * 9;
assert!(small.status_is_close_or_similar(&small));
assert!(large.status_is_close_or_similar(&large));
assert!(!small.status_is_close_or_similar(&large));
assert!(!large.status_is_close_or_similar(&small));
// the large value is put just above the threshold
large.non_reclaimable = small.non_reclaimable + 128 * 1024 * 1024;
assert!(large.status_is_close_or_similar(&large));
assert!(!small.status_is_close_or_similar(&large));
assert!(!large.status_is_close_or_similar(&small));
// now below
large.non_reclaimable -= 1;
assert!(large.status_is_close_or_similar(&large));
assert!(small.status_is_close_or_similar(&large));
assert!(large.status_is_close_or_similar(&small));
}
}

View File

@@ -12,11 +12,11 @@ use futures::{
stream::{SplitSink, SplitStream},
SinkExt, StreamExt,
};
use tracing::{debug, info};
use tracing::info;
use crate::protocol::{
OutboundMsg, OutboundMsgKind, ProtocolRange, ProtocolResponse, ProtocolVersion,
PROTOCOL_MAX_VERSION, PROTOCOL_MIN_VERSION,
OutboundMsg, ProtocolRange, ProtocolResponse, ProtocolVersion, PROTOCOL_MAX_VERSION,
PROTOCOL_MIN_VERSION,
};
/// The central handler for all communications in the monitor.
@@ -118,12 +118,7 @@ impl Dispatcher {
/// serialize the wrong thing and send it, since `self.sink.send` will take
/// any string.
pub async fn send(&mut self, message: OutboundMsg) -> anyhow::Result<()> {
if matches!(&message.inner, OutboundMsgKind::HealthCheck { .. }) {
debug!(?message, "sending message");
} else {
info!(?message, "sending message");
}
info!(?message, "sending message");
let json = serde_json::to_string(&message).context("failed to serialize message")?;
self.sink
.send(Message::Text(json))

View File

@@ -12,7 +12,7 @@ use axum::extract::ws::{Message, WebSocket};
use futures::StreamExt;
use tokio::sync::{broadcast, watch};
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn};
use tracing::{error, info, warn};
use crate::cgroup::{self, CgroupWatcher};
use crate::dispatcher::Dispatcher;
@@ -474,29 +474,26 @@ impl Runner {
// there is a message from the agent
msg = self.dispatcher.source.next() => {
if let Some(msg) = msg {
match &msg {
// Don't use 'message' as a key as the string also uses
// that for its key
info!(?msg, "received message");
match msg {
Ok(msg) => {
let message: InboundMsg = match msg {
Message::Text(text) => {
serde_json::from_str(text).context("failed to deserialize text message")?
serde_json::from_str(&text).context("failed to deserialize text message")?
}
other => {
warn!(
// Don't use 'message' as a key as the
// string also uses that for its key
msg = ?other,
"problem processing incoming message: agent should only send text messages but received different type"
"agent should only send text messages but received different type"
);
continue
},
};
if matches!(&message.inner, InboundMsgKind::HealthCheck { .. }) {
debug!(?msg, "received message");
} else {
info!(?msg, "received message");
}
let out = match self.process_message(message.clone()).await {
Ok(Some(out)) => out,
Ok(None) => continue,
@@ -520,11 +517,7 @@ impl Runner {
.await
.context("failed to send message")?;
}
Err(e) => warn!(
error = format!("{e}"),
msg = ?msg,
"received error message"
),
Err(e) => warn!("{e}"),
}
} else {
anyhow::bail!("dispatcher connection closed")

View File

@@ -49,9 +49,7 @@ char *neon_auth_token;
int readahead_buffer_size = 128;
int flush_every_n_requests = 8;
int neon_protocol_version = 2;
static int last_protocol_version = -1; /* copy of n_p_v used to detect changes
* to the parameter at runtime */
int neon_protocol_version = 2;
static int max_reconnect_attempts = 60;
static int stripe_size;
@@ -232,14 +230,6 @@ AssignPageserverConnstring(const char *newval, void *extra)
memcpy(&pagestore_shared->shard_map, &shard_map, sizeof(ShardMap));
pg_write_barrier();
pg_atomic_add_fetch_u64(&pagestore_shared->end_update_counter, 1);
neon_log(LOG, "Updated shard map: %d active shards",
shard_map.num_shards);
for (int i = 0; i < shard_map.num_shards; i++)
{
neon_shard_log(i, LOG, "New connection string: \"%s\"",
shard_map.connstring[i]);
}
}
else
{
@@ -626,9 +616,6 @@ pageserver_connect(shardno_t shard_no, int elevel)
/* fallthrough */
}
case PS_Connected:
{
int ll;
/*
* We successfully connected. Future connections to this PageServer
* will do fast retries again, with exponential backoff.
@@ -636,18 +623,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
shard->delay_us = MIN_RECONNECT_INTERVAL_USEC;
neon_shard_log(shard_no, DEBUG5, "Connection state: Connected");
if (last_protocol_version != neon_protocol_version)
{
ll = LOG;
last_protocol_version = neon_protocol_version;
}
else
{
ll = DEBUG1;
}
neon_shard_log(shard_no, ll, "libpagestore: connected to '%s' with protocol version %d", connstr, neon_protocol_version);
neon_shard_log(shard_no, LOG, "libpagestore: connected to '%s' with protocol version %d", connstr, neon_protocol_version);
return true;
}
default:
neon_shard_log(shard_no, ERROR, "libpagestore: invalid connection state %d", shard->state);
}
@@ -748,7 +725,7 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
/* If the connection was lost for some reason, reconnect */
if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)
{
neon_shard_log(shard_no, LOG, "pageserver_send disconnect: bad connection");
neon_shard_log(shard_no, LOG, "pageserver_send disconnect bad connection");
pageserver_disconnect(shard_no);
pageserver_conn = NULL;
}
@@ -849,7 +826,7 @@ pageserver_receive(shardno_t shard_no)
}
PG_CATCH();
{
neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: malformatted response");
neon_shard_log(shard_no, LOG, "pageserver_receive: disconnect due malformatted response");
pageserver_disconnect(shard_no);
PG_RE_THROW();
}
@@ -902,7 +879,7 @@ pageserver_flush(shardno_t shard_no)
char *msg = pchomp(PQerrorMessage(pageserver_conn));
pageserver_disconnect(shard_no);
neon_shard_log(shard_no, LOG, "pageserver_flush disconnect: failed to flush page requests: %s", msg);
neon_shard_log(shard_no, LOG, "pageserver_flush disconnect because failed to flush page requests: %s", msg);
pfree(msg);
return false;
}
@@ -1078,14 +1055,6 @@ pg_init_libpagestore(void)
0, /* no flags required */
NULL, NULL, NULL);
/*
* Store last seen protocol version as initialization, so we can log
* differences once we see them appear.
*/
last_protocol_version = neon_protocol_version;
neon_log(LOG, "Initializing libpagestore with protocol version %d",
neon_protocol_version);
relsize_hash_init();
if (page_server != NULL)
@@ -1100,7 +1069,7 @@ pg_init_libpagestore(void)
*/
neon_auth_token = getenv("NEON_AUTH_TOKEN");
if (neon_auth_token)
neon_log(DEBUG1, "using storage auth token from NEON_AUTH_TOKEN environment variable");
neon_log(LOG, "using storage auth token from NEON_AUTH_TOKEN environment variable");
if (page_server_connstring && page_server_connstring[0])
{

View File

@@ -324,15 +324,14 @@ files:
help: 'Whether or not the replication slot wal_status is lost'
key_labels:
- slot_name
values: [wal_is_lost]
values: [wal_status_is_lost]
query: |
SELECT slot_name,
CASE
WHEN wal_status = 'lost' THEN 1
ELSE 0
END AS wal_is_lost
END AS wal_status_is_lost
FROM pg_replication_slots;
- filename: neon_collector_autoscaling.yml
content: |
collector_name: neon_collector_autoscaling

View File

@@ -53,6 +53,8 @@ num-integer = { version = "0.1", features = ["i128"] }
num-traits = { version = "0.2", features = ["i128", "libm"] }
once_cell = { version = "1" }
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["zstd"] }
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon", default-features = false, features = ["with-chrono-0_4"] }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon", default-features = false, features = ["with-chrono-0_4"] }
prost = { version = "0.11" }
rand = { version = "0.8", features = ["small_rng"] }
regex = { version = "1" }
@@ -70,6 +72,7 @@ subtle = { version = "2" }
sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] }
time = { version = "0.3", features = ["macros", "serde-well-known"] }
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon", features = ["with-chrono-0_4"] }
tokio-rustls = { version = "0.24" }
tokio-util = { version = "0.7", features = ["codec", "compat", "io", "rt"] }
toml_datetime = { version = "0.6", default-features = false, features = ["serde"] }