mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 21:12:55 +00:00
pageserver: fixes for layer path changes (#7786)
## Problem - When a layer with legacy local path format is evicted and then re-downloaded, a panic happened because the path downloaded by remote storage didn't match the path stored in Layer. - While investigating, I also realized that secondary locations would have a similar issue with evictions. Closes: #7783 ## Summary of changes - Make remote timeline client take local paths as an input: it should not have its own ideas about local paths, instead it just uses the layer path that the Layer has. - Make secondary state store an explicit local path, populated on scan of local disk at startup. This provides the same behavior as for Layer, that our local_layer_path is a _default_, but the layer path can actually be anything (e.g. an old style one). - Add tests for both cases.
This commit is contained in:
@@ -535,17 +535,11 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
||||
}
|
||||
EvictionLayer::Secondary(layer) => {
|
||||
let file_size = layer.metadata.file_size();
|
||||
let tenant_manager = tenant_manager.clone();
|
||||
|
||||
js.spawn(async move {
|
||||
layer
|
||||
.secondary_tenant
|
||||
.evict_layer(
|
||||
tenant_manager.get_conf(),
|
||||
layer.timeline_id,
|
||||
layer.name,
|
||||
layer.metadata,
|
||||
)
|
||||
.evict_layer(layer.timeline_id, layer.name)
|
||||
.await;
|
||||
Ok(file_size)
|
||||
});
|
||||
|
||||
@@ -518,6 +518,7 @@ impl RemoteTimelineClient {
|
||||
&self,
|
||||
layer_file_name: &LayerName,
|
||||
layer_metadata: &LayerFileMetadata,
|
||||
local_path: &Utf8Path,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<u64> {
|
||||
@@ -536,6 +537,7 @@ impl RemoteTimelineClient {
|
||||
self.timeline_id,
|
||||
layer_file_name,
|
||||
layer_metadata,
|
||||
local_path,
|
||||
cancel,
|
||||
ctx,
|
||||
)
|
||||
|
||||
@@ -21,7 +21,6 @@ use crate::config::PageServerConf;
|
||||
use crate::context::RequestContext;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
||||
use crate::tenant::storage_layer::layer::local_layer_path;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::Generation;
|
||||
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
|
||||
@@ -50,19 +49,13 @@ pub async fn download_layer_file<'a>(
|
||||
timeline_id: TimelineId,
|
||||
layer_file_name: &'a LayerName,
|
||||
layer_metadata: &'a LayerFileMetadata,
|
||||
local_path: &Utf8Path,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<u64, DownloadError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id);
|
||||
let local_path = local_layer_path(
|
||||
conf,
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
layer_file_name,
|
||||
&layer_metadata.generation,
|
||||
);
|
||||
|
||||
let remote_path = remote_layer_path(
|
||||
&tenant_shard_id.tenant_id,
|
||||
@@ -82,7 +75,7 @@ pub async fn download_layer_file<'a>(
|
||||
// For more context about durable_rename check this email from postgres mailing list:
|
||||
// https://www.postgresql.org/message-id/56583BDD.9060302@2ndquadrant.com
|
||||
// If pageserver crashes the temp file will be deleted on startup and re-downloaded.
|
||||
let temp_file_path = path_with_suffix_extension(&local_path, TEMP_DOWNLOAD_EXTENSION);
|
||||
let temp_file_path = path_with_suffix_extension(local_path, TEMP_DOWNLOAD_EXTENSION);
|
||||
|
||||
let bytes_amount = download_retry(
|
||||
|| async { download_object(storage, &remote_path, &temp_file_path, cancel, ctx).await },
|
||||
|
||||
@@ -6,11 +6,9 @@ mod scheduler;
|
||||
use std::{sync::Arc, time::SystemTime};
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
context::RequestContext,
|
||||
disk_usage_eviction_task::DiskUsageEvictionInfo,
|
||||
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
||||
virtual_file::MaybeFatalIo,
|
||||
};
|
||||
|
||||
use self::{
|
||||
@@ -21,9 +19,8 @@ use self::{
|
||||
use super::{
|
||||
config::{SecondaryLocationConfig, TenantConfOpt},
|
||||
mgr::TenantManager,
|
||||
remote_timeline_client::LayerFileMetadata,
|
||||
span::debug_assert_current_span_has_tenant_id,
|
||||
storage_layer::{layer::local_layer_path, LayerName},
|
||||
storage_layer::LayerName,
|
||||
};
|
||||
|
||||
use pageserver_api::{
|
||||
@@ -178,13 +175,7 @@ impl SecondaryTenant {
|
||||
|
||||
/// Cancellation safe, but on cancellation the eviction will go through
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline_id, name=%name))]
|
||||
pub(crate) async fn evict_layer(
|
||||
self: &Arc<Self>,
|
||||
conf: &PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
name: LayerName,
|
||||
metadata: LayerFileMetadata,
|
||||
) {
|
||||
pub(crate) async fn evict_layer(self: &Arc<Self>, timeline_id: TimelineId, name: LayerName) {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
let guard = match self.gate.enter() {
|
||||
@@ -197,41 +188,11 @@ impl SecondaryTenant {
|
||||
|
||||
let now = SystemTime::now();
|
||||
|
||||
let local_path = local_layer_path(
|
||||
conf,
|
||||
&self.tenant_shard_id,
|
||||
&timeline_id,
|
||||
&name,
|
||||
&metadata.generation,
|
||||
);
|
||||
|
||||
let this = self.clone();
|
||||
|
||||
// spawn it to be cancellation safe
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _guard = guard;
|
||||
// We tolerate ENOENT, because between planning eviction and executing
|
||||
// it, the secondary downloader could have seen an updated heatmap that
|
||||
// resulted in a layer being deleted.
|
||||
// Other local I/O errors are process-fatal: these should never happen.
|
||||
let deleted = std::fs::remove_file(local_path);
|
||||
|
||||
let not_found = deleted
|
||||
.as_ref()
|
||||
.is_err_and(|x| x.kind() == std::io::ErrorKind::NotFound);
|
||||
|
||||
let deleted = if not_found {
|
||||
false
|
||||
} else {
|
||||
deleted
|
||||
.map(|()| true)
|
||||
.fatal_err("Deleting layer during eviction")
|
||||
};
|
||||
|
||||
if !deleted {
|
||||
// skip updating accounting and putting perhaps later timestamp
|
||||
return;
|
||||
}
|
||||
|
||||
// Update the timeline's state. This does not have to be synchronized with
|
||||
// the download process, because:
|
||||
@@ -250,8 +211,15 @@ impl SecondaryTenant {
|
||||
// of the cache.
|
||||
let mut detail = this.detail.lock().unwrap();
|
||||
if let Some(timeline_detail) = detail.timelines.get_mut(&timeline_id) {
|
||||
timeline_detail.on_disk_layers.remove(&name);
|
||||
timeline_detail.evicted_at.insert(name, now);
|
||||
let removed = timeline_detail.on_disk_layers.remove(&name);
|
||||
|
||||
// We might race with removal of the same layer during downloads, if it was removed
|
||||
// from the heatmap. If we see that the OnDiskState is gone, then no need to
|
||||
// do a physical deletion or store in evicted_at.
|
||||
if let Some(removed) = removed {
|
||||
removed.remove_blocking();
|
||||
timeline_detail.evicted_at.insert(name, now);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -111,6 +111,7 @@ struct SecondaryDownloader {
|
||||
pub(super) struct OnDiskState {
|
||||
metadata: LayerFileMetadata,
|
||||
access_time: SystemTime,
|
||||
local_path: Utf8PathBuf,
|
||||
}
|
||||
|
||||
impl OnDiskState {
|
||||
@@ -121,12 +122,26 @@ impl OnDiskState {
|
||||
_ame: LayerName,
|
||||
metadata: LayerFileMetadata,
|
||||
access_time: SystemTime,
|
||||
local_path: Utf8PathBuf,
|
||||
) -> Self {
|
||||
Self {
|
||||
metadata,
|
||||
access_time,
|
||||
local_path,
|
||||
}
|
||||
}
|
||||
|
||||
// This is infallible, because all errors are either acceptable (ENOENT), or totally
|
||||
// unexpected (fatal).
|
||||
pub(super) fn remove_blocking(&self) {
|
||||
// We tolerate ENOENT, because between planning eviction and executing
|
||||
// it, the secondary downloader could have seen an updated heatmap that
|
||||
// resulted in a layer being deleted.
|
||||
// Other local I/O errors are process-fatal: these should never happen.
|
||||
std::fs::remove_file(&self.local_path)
|
||||
.or_else(fs_ext::ignore_not_found)
|
||||
.fatal_err("Deleting secondary layer")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -816,20 +831,12 @@ impl<'a> TenantDownloader<'a> {
|
||||
if cfg!(debug_assertions) {
|
||||
// Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think
|
||||
// are already present on disk are really there.
|
||||
let local_path = local_layer_path(
|
||||
self.conf,
|
||||
tenant_shard_id,
|
||||
&timeline.timeline_id,
|
||||
&layer.name,
|
||||
&layer.metadata.generation,
|
||||
);
|
||||
|
||||
match tokio::fs::metadata(&local_path).await {
|
||||
match tokio::fs::metadata(&on_disk.local_path).await {
|
||||
Ok(meta) => {
|
||||
tracing::debug!(
|
||||
"Layer {} present at {}, size {}",
|
||||
layer.name,
|
||||
local_path,
|
||||
on_disk.local_path,
|
||||
meta.len(),
|
||||
);
|
||||
}
|
||||
@@ -837,7 +844,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
tracing::warn!(
|
||||
"Layer {} not found at {} ({})",
|
||||
layer.name,
|
||||
local_path,
|
||||
on_disk.local_path,
|
||||
e
|
||||
);
|
||||
debug_assert!(false);
|
||||
@@ -926,6 +933,13 @@ impl<'a> TenantDownloader<'a> {
|
||||
v.get_mut().access_time = t.access_time;
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
let local_path = local_layer_path(
|
||||
self.conf,
|
||||
tenant_shard_id,
|
||||
&timeline.timeline_id,
|
||||
&t.name,
|
||||
&t.metadata.generation,
|
||||
);
|
||||
e.insert(OnDiskState::new(
|
||||
self.conf,
|
||||
tenant_shard_id,
|
||||
@@ -933,6 +947,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
t.name,
|
||||
LayerFileMetadata::from(&t.metadata),
|
||||
t.access_time,
|
||||
local_path,
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -955,6 +970,14 @@ impl<'a> TenantDownloader<'a> {
|
||||
&self.secondary_state.cancel
|
||||
);
|
||||
|
||||
let local_path = local_layer_path(
|
||||
self.conf,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
&layer.name,
|
||||
&layer.metadata.generation,
|
||||
);
|
||||
|
||||
// Note: no backoff::retry wrapper here because download_layer_file does its own retries internally
|
||||
let downloaded_bytes = match download_layer_file(
|
||||
self.conf,
|
||||
@@ -963,6 +986,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
*timeline_id,
|
||||
&layer.name,
|
||||
&LayerFileMetadata::from(&layer.metadata),
|
||||
&local_path,
|
||||
&self.secondary_state.cancel,
|
||||
ctx,
|
||||
)
|
||||
@@ -1116,6 +1140,7 @@ async fn init_timeline_state(
|
||||
name,
|
||||
LayerFileMetadata::from(&remote_meta.metadata),
|
||||
remote_meta.access_time,
|
||||
file_path,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1108,6 +1108,7 @@ impl LayerInner {
|
||||
.download_layer_file(
|
||||
&self.desc.layer_name(),
|
||||
&self.metadata(),
|
||||
&self.path,
|
||||
&timeline.cancel,
|
||||
ctx,
|
||||
)
|
||||
|
||||
@@ -25,6 +25,7 @@ from fixtures.neon_fixtures import (
|
||||
S3Scrubber,
|
||||
generate_uploads_and_deletions,
|
||||
)
|
||||
from fixtures.pageserver.common_types import parse_layer_file_name
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.pageserver.utils import (
|
||||
assert_tenant_state,
|
||||
@@ -632,39 +633,86 @@ def test_upgrade_generationless_local_file_paths(
|
||||
generation numbers: it should accept these layer files, and avoid doing
|
||||
a delete/download cycle on them.
|
||||
"""
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
neon_env_builder.num_pageservers = 2
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
tenant_id = TenantId.generate()
|
||||
timeline_id = TimelineId.generate()
|
||||
env.neon_cli.create_tenant(
|
||||
tenant_id, timeline_id, conf=TENANT_CONF, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init()
|
||||
workload.write_rows(1000)
|
||||
|
||||
env.pageserver.stop()
|
||||
attached_pageserver = env.get_tenant_pageserver(tenant_id)
|
||||
secondary_pageserver = list([ps for ps in env.pageservers if ps.id != attached_pageserver.id])[
|
||||
0
|
||||
]
|
||||
|
||||
attached_pageserver.http_client().tenant_heatmap_upload(tenant_id)
|
||||
secondary_pageserver.http_client().tenant_secondary_download(tenant_id)
|
||||
|
||||
# Rename the local paths to legacy format, to simulate what
|
||||
# we would see when upgrading
|
||||
timeline_dir = env.pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
files_renamed = 0
|
||||
for filename in os.listdir(timeline_dir):
|
||||
path = os.path.join(timeline_dir, filename)
|
||||
log.info(f"Found file {path}")
|
||||
if path.endswith("-v1-00000001"):
|
||||
new_path = path[:-12]
|
||||
os.rename(path, new_path)
|
||||
log.info(f"Renamed {path} -> {new_path}")
|
||||
files_renamed += 1
|
||||
# we would see when upgrading. Do this on both attached and secondary locations, as we will
|
||||
# test the behavior of both.
|
||||
for pageserver in env.pageservers:
|
||||
pageserver.stop()
|
||||
timeline_dir = pageserver.timeline_dir(tenant_id, timeline_id)
|
||||
files_renamed = 0
|
||||
for filename in os.listdir(timeline_dir):
|
||||
path = os.path.join(timeline_dir, filename)
|
||||
log.info(f"Found file {path}")
|
||||
if path.endswith("-v1-00000001"):
|
||||
new_path = path[:-12]
|
||||
os.rename(path, new_path)
|
||||
log.info(f"Renamed {path} -> {new_path}")
|
||||
files_renamed += 1
|
||||
|
||||
assert files_renamed > 0
|
||||
assert files_renamed > 0
|
||||
|
||||
env.pageserver.start()
|
||||
pageserver.start()
|
||||
|
||||
workload.validate()
|
||||
|
||||
# Assert that there were no on-demand downloads
|
||||
assert (
|
||||
env.pageserver.http_client().get_metric_value(
|
||||
attached_pageserver.http_client().get_metric_value(
|
||||
"pageserver_remote_ondemand_downloaded_layers_total"
|
||||
)
|
||||
== 0
|
||||
)
|
||||
|
||||
# Do a secondary download and ensure there were no layer downloads
|
||||
secondary_pageserver.http_client().tenant_secondary_download(tenant_id)
|
||||
assert (
|
||||
secondary_pageserver.http_client().get_metric_value(
|
||||
"pageserver_secondary_download_layer_total"
|
||||
)
|
||||
== 0
|
||||
)
|
||||
|
||||
# Check that when we evict and promote one of the legacy-named layers, everything works as
|
||||
# expected
|
||||
local_layers = list(
|
||||
(
|
||||
parse_layer_file_name(path.name),
|
||||
os.path.join(attached_pageserver.timeline_dir(tenant_id, timeline_id), path),
|
||||
)
|
||||
for path in attached_pageserver.list_layers(tenant_id, timeline_id)
|
||||
)
|
||||
(victim_layer_name, victim_path) = local_layers[0]
|
||||
assert os.path.exists(victim_path)
|
||||
|
||||
attached_pageserver.http_client().evict_layer(
|
||||
tenant_id, timeline_id, victim_layer_name.to_str()
|
||||
)
|
||||
assert not os.path.exists(victim_path)
|
||||
|
||||
attached_pageserver.http_client().download_layer(
|
||||
tenant_id, timeline_id, victim_layer_name.to_str()
|
||||
)
|
||||
# We should download into the same local path we started with
|
||||
assert os.path.exists(victim_path)
|
||||
|
||||
Reference in New Issue
Block a user