Compare commits

...

5 Commits

Author SHA1 Message Date
John Spray
62aa8f2ba2 hacky way to make tests pass 2024-10-04 09:56:56 +00:00
John Spray
efc3f1cfe5 DNM: test-log 2024-10-03 11:29:06 +00:00
John Spray
b66bf890fa storage controller: fix a disagreement between schedule + optimize
Closes: https://github.com/neondatabase/neon/issues/8969
2024-10-03 11:14:34 +00:00
Joonas Koivunen
dbef1b064c chore: smaller layer changes (#9247)
Address minor technical debt in Layer inspired by #9224:

- layer usage as arg same as in spans
- avoid one Weak::upgrade
2024-10-03 09:38:45 +01:00
Heikki Linnakangas
6a9e2d657c Remove unnecessary dependencies from postgis-build image (#9211)
The apt install stage before this commit:

    0 upgraded, 391 newly installed, 0 to remove and 9 not upgraded.
    Need to get 261 MB of archives.

after:

    0 upgraded, 367 newly installed, 0 to remove and 9 not upgraded.
    Need to get 220 MB of archives.
2024-10-03 10:05:23 +03:00
7 changed files with 328 additions and 39 deletions

102
Cargo.lock generated
View File

@@ -82,12 +82,27 @@ dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"anstyle-wincon 1.0.1",
"colorchoice",
"is-terminal",
"utf8parse",
]
[[package]]
name = "anstream"
version = "0.6.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon 3.0.4",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.8"
@@ -122,6 +137,16 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
dependencies = [
"anstyle",
"windows-sys 0.52.0",
]
[[package]]
name = "anyhow"
version = "1.0.71"
@@ -1185,7 +1210,7 @@ version = "4.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
dependencies = [
"anstream",
"anstream 0.3.2",
"anstyle",
"bitflags 1.3.2",
"clap_lex",
@@ -1924,6 +1949,15 @@ dependencies = [
"syn 2.0.52",
]
[[package]]
name = "env_filter"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
dependencies = [
"log",
]
[[package]]
name = "env_logger"
version = "0.10.2"
@@ -1937,6 +1971,18 @@ dependencies = [
"termcolor",
]
[[package]]
name = "env_logger"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
dependencies = [
"anstream 0.6.15",
"anstyle",
"env_filter",
"log",
]
[[package]]
name = "equivalent"
version = "1.0.1"
@@ -2811,6 +2857,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.10.5"
@@ -3254,6 +3306,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num"
version = "0.4.1"
@@ -3537,6 +3599,12 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "p256"
version = "0.11.1"
@@ -4119,7 +4187,7 @@ dependencies = [
"bindgen",
"bytes",
"crc32c",
"env_logger",
"env_logger 0.10.2",
"log",
"memoffset 0.9.0",
"once_cell",
@@ -4312,7 +4380,7 @@ dependencies = [
"consumption_metrics",
"dashmap",
"ecdsa 0.16.9",
"env_logger",
"env_logger 0.10.2",
"fallible-iterator",
"framed-websockets",
"futures",
@@ -5787,6 +5855,7 @@ dependencies = [
"serde_json",
"strum",
"strum_macros",
"test-log",
"thiserror",
"tokio",
"tokio-util",
@@ -6039,6 +6108,28 @@ dependencies = [
"syn 2.0.52",
]
[[package]]
name = "test-log"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93"
dependencies = [
"env_logger 0.11.2",
"test-log-macros",
"tracing-subscriber",
]
[[package]]
name = "test-log-macros"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.52",
]
[[package]]
name = "thiserror"
version = "1.0.57"
@@ -6570,6 +6661,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"serde",
@@ -6874,7 +6966,7 @@ dependencies = [
"anyhow",
"camino-tempfile",
"clap",
"env_logger",
"env_logger 0.10.2",
"log",
"postgres",
"postgres_ffi",

View File

@@ -27,8 +27,8 @@ RUN case $DEBIAN_FLAVOR in \
;; \
esac && \
apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
apt install --no-install-recommends -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \
$VERSION_INSTALLS
@@ -104,7 +104,7 @@ FROM build-deps AS postgis-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \
apt install -y gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
apt install --no-install-recommends -y gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
protobuf-c-compiler xsltproc
@@ -182,7 +182,7 @@ RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang
apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
@@ -587,7 +587,7 @@ RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt-get update && \
apt-get install -y \
apt-get install --no-install-recommends -y \
libboost-iostreams1.74-dev \
libboost-regex1.74-dev \
libboost-serialization1.74-dev \
@@ -752,7 +752,7 @@ ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt-get update && \
apt-get install -y curl libclang-dev && \
apt-get install --no-install-recommends -y curl libclang-dev && \
useradd -ms /bin/bash nonroot -b /home
ENV HOME=/home/nonroot
@@ -1058,9 +1058,12 @@ FROM debian:$DEBIAN_FLAVOR AS pgbouncer
ARG DEBIAN_FLAVOR
RUN set -e \
&& apt-get update \
&& apt-get install -y \
&& apt-get install --no-install-recommends -y \
build-essential \
git \
ca-certificates \
autoconf \
automake \
libevent-dev \
libtool \
pkg-config

View File

@@ -458,8 +458,8 @@ impl Layer {
// This case is legal in brief time windows: for example an in-flight getpage request can hold on to a layer object
// which was covered by a concurrent compaction.
tracing::info!(
"Layer {} became visible as a result of access",
self.0.desc.layer_name()
layer=%self,
"became visible as a result of access",
);
}
}
@@ -688,7 +688,9 @@ impl Drop for LayerInner {
// and we could be delaying shutdown for nothing.
}
if let Some(timeline) = self.timeline.upgrade() {
let timeline = self.timeline.upgrade();
if let Some(timeline) = timeline.as_ref() {
// Only need to decrement metrics if the timeline still exists: otherwise
// it will have already de-registered these metrics via TimelineMetrics::shutdown
if self.desc.is_delta() {
@@ -719,7 +721,6 @@ impl Drop for LayerInner {
let path = std::mem::take(&mut self.path);
let file_name = self.layer_desc().layer_name();
let file_size = self.layer_desc().file_size;
let timeline = self.timeline.clone();
let meta = self.metadata();
let status = self.status.take();
@@ -729,7 +730,7 @@ impl Drop for LayerInner {
// carry this until we are finished for [`Layer::wait_drop`] support
let _status = status;
let Some(timeline) = timeline.upgrade() else {
let Some(timeline) = timeline else {
// no need to nag that timeline is gone: under normal situation on
// task_mgr::remove_tenant_from_memory the timeline is gone before we get dropped.
LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::TimelineGone);

View File

@@ -56,3 +56,6 @@ utils = { path = "../libs/utils/" }
metrics = { path = "../libs/metrics/" }
control_plane = { path = "../control_plane" }
workspace_hack = { version = "0.1", path = "../workspace_hack" }
[dev-dependencies]
test-log = "*"

View File

@@ -206,6 +206,10 @@ pub(crate) struct NodeSecondarySchedulingScore {
/// The number of shards belonging to the tenant currently being
/// scheduled that are attached to this node.
affinity_score: AffinityScore,
/// Size of [`ScheduleContext::attached_nodes`] for the current node.
/// This normally tracks the number of attached shards belonging to the
/// tenant being scheduled that are already on this node.
secondary_shards_in_context: usize,
/// Utilisation score that combines shard count and disk utilisation
utilization_score: u64,
/// Total number of shards attached to this node. When nodes have identical utilisation, this
@@ -231,6 +235,7 @@ impl NodeSchedulingScore for NodeSecondarySchedulingScore {
Some(Self {
az_match: SecondaryAzMatch(AzMatch::new(&node.az, preferred_az.as_ref())),
secondary_shards_in_context: context.secondary_nodes.get(node_id).copied().unwrap_or(0),
affinity_score: context
.nodes
.get(node_id)
@@ -327,6 +332,9 @@ pub(crate) struct ScheduleContext {
/// Specifically how many _attached_ locations are on each node
pub(crate) attached_nodes: HashMap<NodeId, usize>,
/// Specifically how many _secondary_ locations are on each node
pub(crate) secondary_nodes: HashMap<NodeId, usize>,
pub(crate) mode: ScheduleMode,
}
@@ -345,6 +353,11 @@ impl ScheduleContext {
*entry += 1;
}
pub(crate) fn push_secondary(&mut self, node_id: NodeId) {
let entry = self.secondary_nodes.entry(node_id).or_default();
*entry += 1;
}
pub(crate) fn get_node_affinity(&self, node_id: NodeId) -> AffinityScore {
self.nodes
.get(&node_id)
@@ -786,7 +799,14 @@ pub(crate) mod test_utils {
#[cfg(test)]
mod tests {
use pageserver_api::{controller_api::NodeAvailability, models::utilization::test_utilization};
use pageserver_api::{
controller_api::NodeAvailability, models::utilization::test_utilization,
shard::ShardIdentity, shard::TenantShardId,
};
use utils::{
id::TenantId,
shard::{ShardCount, ShardNumber},
};
use super::*;
@@ -1074,4 +1094,171 @@ mod tests {
intent.clear(&mut scheduler);
}
}
#[test]
fn repro_foo() {
let az_tag = AvailabilityZone("az-a".to_string());
let nodes = test_utils::make_test_nodes(
5,
&[
az_tag.clone(),
az_tag.clone(),
az_tag.clone(),
az_tag.clone(),
az_tag.clone(),
],
);
let mut scheduler = Scheduler::new(nodes.values());
// Need to keep these alive because they contribute to shard counts via RAII
let mut scheduled_shards = Vec::new();
let mut context = ScheduleContext::default();
fn schedule_shard(
tenant_shard_id: TenantShardId,
expect_attached: NodeId,
expect_secondary: NodeId,
scheduled_shards: &mut Vec<TenantShard>,
scheduler: &mut Scheduler,
context: &mut ScheduleContext,
) {
let shard_identity = ShardIdentity::new(
tenant_shard_id.shard_number,
tenant_shard_id.shard_count,
pageserver_api::shard::ShardStripeSize(1),
)
.unwrap();
let mut shard = TenantShard::new(
tenant_shard_id,
shard_identity,
pageserver_api::controller_api::PlacementPolicy::Attached(1),
);
shard.schedule(scheduler, context).unwrap();
assert_eq!(shard.intent.get_attached().unwrap(), expect_attached);
assert_eq!(
shard.intent.get_secondary().first().unwrap(),
&expect_secondary
);
scheduled_shards.push(shard);
}
let tenant_id = TenantId::generate();
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(0),
shard_count: ShardCount(8),
},
NodeId(1),
NodeId(2),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(1),
shard_count: ShardCount(8),
},
NodeId(3),
NodeId(4),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(2),
shard_count: ShardCount(8),
},
NodeId(5),
NodeId(1),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(3),
shard_count: ShardCount(8),
},
NodeId(2),
NodeId(3),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(4),
shard_count: ShardCount(8),
},
NodeId(4),
NodeId(5),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(5),
shard_count: ShardCount(8),
},
NodeId(1),
NodeId(2),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(6),
shard_count: ShardCount(8),
},
NodeId(3),
NodeId(4),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
schedule_shard(
TenantShardId {
tenant_id,
shard_number: ShardNumber(7),
shard_count: ShardCount(8),
},
NodeId(5),
NodeId(1),
&mut scheduled_shards,
&mut scheduler,
&mut context,
);
for shard in &scheduled_shards {
assert_eq!(shard.optimize_attachment(&nodes, &context), None);
}
for mut shard in scheduled_shards {
shard.intent.clear(&mut scheduler);
}
}
}

View File

@@ -5858,10 +5858,7 @@ impl Service {
// Accumulate the schedule context for all the shards in a tenant: we must have
// the total view of all shards before we can try to optimize any of them.
schedule_context.avoid(&shard.intent.all_pageservers());
if let Some(attached) = shard.intent.get_attached() {
schedule_context.push_attached(*attached);
}
shard.populate_context(&mut schedule_context);
tenant_shards.push(shard);
// Once we have seen the last shard in the tenant, proceed to search across all shards

View File

@@ -566,10 +566,7 @@ impl TenantShard {
) -> Result<(), ScheduleError> {
let r = self.do_schedule(scheduler, context);
context.avoid(&self.intent.all_pageservers());
if let Some(attached) = self.intent.get_attached() {
context.push_attached(*attached);
}
self.populate_context(context);
r
}
@@ -676,6 +673,19 @@ impl TenantShard {
Ok(())
}
/// When building the ScheduleContext of a tenant, call this on each shard to
/// add its contribution to the context.
pub(crate) fn populate_context(&self, context: &mut ScheduleContext) {
context.avoid(&self.intent.all_pageservers());
if let Some(attached) = self.intent.get_attached() {
context.push_attached(*attached);
}
for secondary in self.intent.get_secondary() {
context.push_secondary(*secondary);
}
}
/// Reschedule this tenant shard to one of its secondary locations. Returns a scheduling error
/// if the swap is not possible and leaves the intent state in its original state.
///
@@ -823,10 +833,13 @@ impl TenantShard {
continue;
};
// TODO: make this AZ aware: secondary should be chosen "As if I am an attachment, but
// in a different AZ to my actual preferred AZ"
// Let the scheduler suggest a node, where it would put us if we were scheduling afresh
// This implicitly limits the choice to nodes that are available, and prefers nodes
// with lower utilization.
let Ok(candidate_node) = scheduler.schedule_shard::<SecondaryShardTag>(
let Ok(candidate_node) = scheduler.schedule_shard::<AttachedShardTag>(
&self.intent.all_pageservers(),
&self.preferred_az_id,
schedule_context,
@@ -1632,10 +1645,8 @@ pub(crate) mod tests {
shard_b.intent.push_secondary(&mut scheduler, NodeId(3));
let mut schedule_context = ScheduleContext::default();
schedule_context.avoid(&shard_a.intent.all_pageservers());
schedule_context.push_attached(shard_a.intent.get_attached().unwrap());
schedule_context.avoid(&shard_b.intent.all_pageservers());
schedule_context.push_attached(shard_b.intent.get_attached().unwrap());
shard_a.populate_context(&mut schedule_context);
shard_b.populate_context(&mut schedule_context);
let optimization_a = shard_a.optimize_attachment(&nodes, &schedule_context);
@@ -1699,10 +1710,8 @@ pub(crate) mod tests {
shard_b.intent.push_secondary(&mut scheduler, NodeId(3));
let mut schedule_context = ScheduleContext::default();
schedule_context.avoid(&shard_a.intent.all_pageservers());
schedule_context.push_attached(shard_a.intent.get_attached().unwrap());
schedule_context.avoid(&shard_b.intent.all_pageservers());
schedule_context.push_attached(shard_b.intent.get_attached().unwrap());
shard_a.populate_context(&mut schedule_context);
shard_b.populate_context(&mut schedule_context);
let optimization_a = shard_a.optimize_secondary(&mut scheduler, &schedule_context);
@@ -1744,10 +1753,7 @@ pub(crate) mod tests {
let mut any_changed = false;
for shard in shards.iter() {
schedule_context.avoid(&shard.intent.all_pageservers());
if let Some(attached) = shard.intent.get_attached() {
schedule_context.push_attached(*attached);
}
shard.populate_context(&mut schedule_context);
}
for shard in shards.iter_mut() {