refactor(rtc): remove the duplicate IndexLayerMetadata (#7860)

Once upon a time, we used to have duplicated types for runtime IndexPart
and whatever we stored. Because of the serde fixes in #5335 we have no
need for duplicated IndexPart type anymore, but the `IndexLayerMetadata`
stayed.

- remove the type
- remove LayerFileMetadata::file_size() in favor of direct field access

Split off from #7833. Cc: #3072.
This commit is contained in:
Joonas Koivunen
2024-05-23 23:24:31 +03:00
committed by GitHub
parent 6b3164269c
commit 7cf726e36e
14 changed files with 65 additions and 115 deletions

View File

@@ -380,8 +380,8 @@ impl interface::CompactionLayer<Key> for MockLayer {
}
fn file_size(&self) -> u64 {
match self {
MockLayer::Delta(this) => this.file_size(),
MockLayer::Image(this) => this.file_size(),
MockLayer::Delta(this) => this.file_size,
MockLayer::Image(this) => this.file_size,
}
}
fn short_id(&self) -> String {

View File

@@ -2,7 +2,7 @@ use std::collections::HashMap;
use anyhow::Context;
use camino::Utf8PathBuf;
use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver::tenant::storage_layer::LayerName;
use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
use utils::lsn::Lsn;
@@ -19,7 +19,7 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
#[derive(serde::Serialize)]
struct Output<'a> {
layer_metadata: &'a HashMap<LayerName, IndexLayerMetadata>,
layer_metadata: &'a HashMap<LayerName, LayerFileMetadata>,
disk_consistent_lsn: Lsn,
timeline_metadata: &'a TimelineMetadata,
}

View File

@@ -534,7 +534,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
});
}
EvictionLayer::Secondary(layer) => {
let file_size = layer.metadata.file_size();
let file_size = layer.metadata.file_size;
js.spawn(async move {
layer
@@ -641,7 +641,7 @@ impl EvictionLayer {
pub(crate) fn get_file_size(&self) -> u64 {
match self {
Self::Attached(l) => l.layer_desc().file_size,
Self::Secondary(sl) => sl.metadata.file_size(),
Self::Secondary(sl) => sl.metadata.file_size,
}
}
}

View File

@@ -1192,7 +1192,7 @@ impl RemoteTimelineClient {
&self.storage_impl,
uploaded.local_path(),
&remote_path,
uploaded.metadata().file_size(),
uploaded.metadata().file_size,
cancel,
)
.await
@@ -1573,7 +1573,7 @@ impl RemoteTimelineClient {
&self.storage_impl,
local_path,
&remote_path,
layer_metadata.file_size(),
layer_metadata.file_size,
&self.cancel,
)
.measure_remote_op(
@@ -1768,7 +1768,7 @@ impl RemoteTimelineClient {
UploadOp::UploadLayer(_, m) => (
RemoteOpFileKind::Layer,
RemoteOpKind::Upload,
RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size()),
RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),
),
UploadOp::UploadMetadata(_, _) => (
RemoteOpFileKind::Index,

View File

@@ -84,7 +84,7 @@ pub async fn download_layer_file<'a>(
)
.await?;
let expected = layer_metadata.file_size();
let expected = layer_metadata.file_size;
if expected != bytes_amount {
return Err(DownloadError::Other(anyhow!(
"According to layer file metadata should have downloaded {expected} bytes but downloaded {bytes_amount} bytes into file {temp_file_path:?}",

View File

@@ -17,46 +17,6 @@ use pageserver_api::shard::ShardIndex;
use utils::lsn::Lsn;
/// Metadata gathered for each of the layer files.
///
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
/// might have less or more metadata depending if upgrading or rolling back an upgrade.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
//#[cfg_attr(test, derive(Default))]
pub struct LayerFileMetadata {
file_size: u64,
pub(crate) generation: Generation,
pub(crate) shard: ShardIndex,
}
impl From<&'_ IndexLayerMetadata> for LayerFileMetadata {
fn from(other: &IndexLayerMetadata) -> Self {
LayerFileMetadata {
file_size: other.file_size,
generation: other.generation,
shard: other.shard,
}
}
}
impl LayerFileMetadata {
pub fn new(file_size: u64, generation: Generation, shard: ShardIndex) -> Self {
LayerFileMetadata {
file_size,
generation,
shard,
}
}
pub fn file_size(&self) -> u64 {
self.file_size
}
}
// TODO seems like another part of the remote storage file format
// compatibility issue, see https://github.com/neondatabase/neon/issues/3072
/// In-memory representation of an `index_part.json` file
///
/// Contains the data about all files in the timeline, present remotely and its metadata.
@@ -77,7 +37,7 @@ pub struct IndexPart {
///
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
/// that latest version stores.
pub layer_metadata: HashMap<LayerName, IndexLayerMetadata>,
pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
// It's duplicated for convenience when reading the serialized structure, but is
@@ -127,10 +87,7 @@ impl IndexPart {
lineage: Lineage,
last_aux_file_policy: Option<AuxFilePolicy>,
) -> Self {
let layer_metadata = layers_and_metadata
.iter()
.map(|(k, v)| (k.to_owned(), IndexLayerMetadata::from(v)))
.collect();
let layer_metadata = layers_and_metadata.clone();
Self {
version: Self::LATEST_VERSION,
@@ -194,9 +151,12 @@ impl From<&UploadQueueInitialized> for IndexPart {
}
}
/// Serialized form of [`LayerFileMetadata`].
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct IndexLayerMetadata {
/// Metadata gathered for each of the layer files.
///
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
/// might have less or more metadata depending if upgrading or rolling back an upgrade.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct LayerFileMetadata {
pub file_size: u64,
#[serde(default = "Generation::none")]
@@ -208,12 +168,12 @@ pub struct IndexLayerMetadata {
pub shard: ShardIndex,
}
impl From<&LayerFileMetadata> for IndexLayerMetadata {
fn from(other: &LayerFileMetadata) -> Self {
IndexLayerMetadata {
file_size: other.file_size,
generation: other.generation,
shard: other.shard,
impl LayerFileMetadata {
pub fn new(file_size: u64, generation: Generation, shard: ShardIndex) -> Self {
LayerFileMetadata {
file_size,
generation,
shard,
}
}
}
@@ -307,12 +267,12 @@ mod tests {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 1,
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
file_size: 25600000,
generation: Generation::none(),
shard: ShardIndex::unsharded()
}),
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: 9007199254741001,
@@ -349,12 +309,12 @@ mod tests {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 1,
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
file_size: 25600000,
generation: Generation::none(),
shard: ShardIndex::unsharded()
}),
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: 9007199254741001,
@@ -392,12 +352,12 @@ mod tests {
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
version: 2,
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
file_size: 25600000,
generation: Generation::none(),
shard: ShardIndex::unsharded()
}),
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: 9007199254741001,
@@ -480,12 +440,12 @@ mod tests {
let expected = IndexPart {
version: 4,
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
file_size: 25600000,
generation: Generation::none(),
shard: ShardIndex::unsharded()
}),
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: 9007199254741001,
@@ -522,12 +482,12 @@ mod tests {
let expected = IndexPart {
version: 5,
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499".parse().unwrap(), LayerFileMetadata {
file_size: 23289856,
generation: Generation::new(1),
shard: ShardIndex::unsharded(),
}),
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619".parse().unwrap(), LayerFileMetadata {
file_size: 1015808,
generation: Generation::new(1),
shard: ShardIndex::unsharded(),
@@ -569,12 +529,12 @@ mod tests {
let expected = IndexPart {
version: 6,
layer_metadata: HashMap::from([
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
file_size: 25600000,
generation: Generation::none(),
shard: ShardIndex::unsharded()
}),
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
// serde_json should always parse this but this might be a double with jq for
// example.
file_size: 9007199254741001,

View File

@@ -716,7 +716,7 @@ impl<'a> TenantDownloader<'a> {
let mut layer_byte_count: u64 = timeline_state
.on_disk_layers
.values()
.map(|l| l.metadata.file_size())
.map(|l| l.metadata.file_size)
.sum();
// Remove on-disk layers that are no longer present in heatmap
@@ -727,7 +727,7 @@ impl<'a> TenantDownloader<'a> {
.get(layer_file_name)
.unwrap()
.metadata
.file_size();
.file_size;
let local_path = local_layer_path(
self.conf,
@@ -886,9 +886,7 @@ impl<'a> TenantDownloader<'a> {
}
}
if on_disk.metadata != LayerFileMetadata::from(&layer.metadata)
|| on_disk.access_time != layer.access_time
{
if on_disk.metadata != layer.metadata || on_disk.access_time != layer.access_time {
// We already have this layer on disk. Update its access time.
tracing::debug!(
"Access time updated for layer {}: {} -> {}",
@@ -979,7 +977,7 @@ impl<'a> TenantDownloader<'a> {
tenant_shard_id,
&timeline.timeline_id,
t.name,
LayerFileMetadata::from(&t.metadata),
t.metadata.clone(),
t.access_time,
local_path,
));
@@ -1024,7 +1022,7 @@ impl<'a> TenantDownloader<'a> {
*tenant_shard_id,
*timeline_id,
&layer.name,
&LayerFileMetadata::from(&layer.metadata),
&layer.metadata,
&local_path,
&self.secondary_state.cancel,
ctx,
@@ -1185,7 +1183,7 @@ async fn init_timeline_state(
tenant_shard_id,
&heatmap.timeline_id,
name,
LayerFileMetadata::from(&remote_meta.metadata),
remote_meta.metadata.clone(),
remote_meta.access_time,
file_path,
),

View File

@@ -1,6 +1,6 @@
use std::time::SystemTime;
use crate::tenant::{remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerName};
use crate::tenant::{remote_timeline_client::index::LayerFileMetadata, storage_layer::LayerName};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
@@ -38,7 +38,7 @@ pub(crate) struct HeatMapTimeline {
#[derive(Serialize, Deserialize)]
pub(crate) struct HeatMapLayer {
pub(super) name: LayerName,
pub(super) metadata: IndexLayerMetadata,
pub(super) metadata: LayerFileMetadata,
#[serde_as(as = "TimestampSeconds<i64>")]
pub(super) access_time: SystemTime,
@@ -49,7 +49,7 @@ pub(crate) struct HeatMapLayer {
impl HeatMapLayer {
pub(crate) fn new(
name: LayerName,
metadata: IndexLayerMetadata,
metadata: LayerFileMetadata,
access_time: SystemTime,
) -> Self {
Self {

View File

@@ -161,7 +161,7 @@ impl Layer {
timeline.tenant_shard_id,
timeline.timeline_id,
file_name,
metadata.file_size(),
metadata.file_size,
);
let access_stats = LayerAccessStats::for_loading_layer(LayerResidenceStatus::Evicted);
@@ -194,7 +194,7 @@ impl Layer {
timeline.tenant_shard_id,
timeline.timeline_id,
file_name,
metadata.file_size(),
metadata.file_size,
);
let access_stats = LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident);
@@ -227,7 +227,7 @@ impl Layer {
timeline
.metrics
.resident_physical_size_add(metadata.file_size());
.resident_physical_size_add(metadata.file_size);
ResidentLayer { downloaded, owner }
}

View File

@@ -1424,7 +1424,7 @@ impl Timeline {
let layer_map = guard.layer_map();
let mut size = 0;
for l in layer_map.iter_historic_layers() {
size += l.file_size();
size += l.file_size;
}
size
}
@@ -2516,7 +2516,7 @@ impl Timeline {
Ok(UseRemote { local, remote }) => {
// Remote is authoritative, but we may still choose to retain
// the local file if the contents appear to match
if local.metadata.file_size() == remote.file_size() {
if local.metadata.file_size == remote.file_size {
// Use the local file, but take the remote metadata so that we pick up
// the correct generation.
UseLocal(LocalLayerFileMetadata {
@@ -2556,7 +2556,7 @@ impl Timeline {
let layer = match decision {
UseLocal(local) => {
total_physical_size += local.metadata.file_size();
total_physical_size += local.metadata.file_size;
Layer::for_resident(conf, &this, local.local_path, name, local.metadata)
.drop_eviction_guard()
}
@@ -3071,7 +3071,7 @@ impl Timeline {
HeatMapLayer::new(
layer.layer_desc().layer_name(),
(&layer.metadata()).into(),
layer.metadata(),
last_activity_ts,
)
});

View File

@@ -157,7 +157,7 @@ pub(super) fn reconcile(
.map(|ip| ip.layer_metadata.iter())
.into_iter()
.flatten()
.map(|(name, metadata)| (name, LayerFileMetadata::from(metadata)))
.map(|(name, metadata)| (name, metadata.clone()))
.for_each(|(name, metadata)| {
if let Some(existing) = discovered.get_mut(name) {
existing.1 = Some(metadata);
@@ -200,8 +200,8 @@ pub(super) fn cleanup_local_file_for_remote(
local: &LocalLayerFileMetadata,
remote: &LayerFileMetadata,
) -> anyhow::Result<()> {
let local_size = local.metadata.file_size();
let remote_size = remote.file_size();
let local_size = local.metadata.file_size;
let remote_size = remote.file_size;
let path = &local.local_path;
let file_name = path.file_name().expect("must be file path");

View File

@@ -213,10 +213,7 @@ impl UploadQueue {
let mut files = HashMap::with_capacity(index_part.layer_metadata.len());
for (layer_name, layer_metadata) in &index_part.layer_metadata {
files.insert(
layer_name.to_owned(),
LayerFileMetadata::from(layer_metadata),
);
files.insert(layer_name.to_owned(), layer_metadata.clone());
}
info!(
@@ -322,9 +319,7 @@ impl std::fmt::Display for UploadOp {
write!(
f,
"UploadLayer({}, size={:?}, gen={:?})",
layer,
metadata.file_size(),
metadata.generation
layer, metadata.file_size, metadata.generation
)
}
UploadOp::UploadMetadata(_, lsn) => {

View File

@@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet};
use anyhow::Context;
use aws_sdk_s3::{types::ObjectIdentifier, Client};
use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver_api::shard::ShardIndex;
use tracing::{error, info, warn};
use utils::generation::Generation;
@@ -208,7 +208,7 @@ impl TenantObjectListing {
&mut self,
timeline_id: TimelineId,
layer_file: &LayerName,
metadata: &IndexLayerMetadata,
metadata: &LayerFileMetadata,
) -> bool {
let Some(shard_tl) = self.shard_timelines.get_mut(&(metadata.shard, timeline_id)) else {
return false;

View File

@@ -11,7 +11,7 @@ use async_stream::stream;
use aws_sdk_s3::Client;
use camino::Utf8PathBuf;
use futures::{StreamExt, TryStreamExt};
use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
use pageserver::tenant::storage_layer::LayerName;
use pageserver::tenant::IndexPart;
use pageserver_api::shard::TenantShardId;
@@ -49,8 +49,8 @@ impl SnapshotDownloader {
&self,
ttid: TenantShardTimelineId,
layer_name: LayerName,
layer_metadata: IndexLayerMetadata,
) -> anyhow::Result<(LayerName, IndexLayerMetadata)> {
layer_metadata: LayerFileMetadata,
) -> anyhow::Result<(LayerName, LayerFileMetadata)> {
// Note this is local as in a local copy of S3 data, not local as in the pageserver's local format. They use
// different layer names (remote-style has the generation suffix)
let local_path = self.output_path.join(format!(
@@ -110,7 +110,7 @@ impl SnapshotDownloader {
async fn download_layers(
&self,
ttid: TenantShardTimelineId,
layers: Vec<(LayerName, IndexLayerMetadata)>,
layers: Vec<(LayerName, LayerFileMetadata)>,
) -> anyhow::Result<()> {
let layer_count = layers.len();
tracing::info!("Downloading {} layers for timeline {ttid}...", layer_count);
@@ -161,10 +161,7 @@ impl SnapshotDownloader {
ttid: TenantShardTimelineId,
index_part: Box<IndexPart>,
index_part_generation: Generation,
ancestor_layers: &mut HashMap<
TenantShardTimelineId,
HashMap<LayerName, IndexLayerMetadata>,
>,
ancestor_layers: &mut HashMap<TenantShardTimelineId, HashMap<LayerName, LayerFileMetadata>>,
) -> anyhow::Result<()> {
let index_bytes = serde_json::to_string(&index_part).unwrap();
@@ -234,7 +231,7 @@ impl SnapshotDownloader {
// happen if this tenant has been split at some point)
let mut ancestor_layers: HashMap<
TenantShardTimelineId,
HashMap<LayerName, IndexLayerMetadata>,
HashMap<LayerName, LayerFileMetadata>,
> = Default::default();
for shard in shards.into_iter().filter(|s| s.shard_count == shard_count) {