From 2d37db234ac73a05099edd20e96c6492ab8a4063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 4 Jun 2024 15:53:15 +0200 Subject: [PATCH] Add mode to compare multiple files from a tenant --- pageserver/ctl/src/layers.rs | 81 ++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs index 340718ef1a..1ca396cbbe 100644 --- a/pageserver/ctl/src/layers.rs +++ b/pageserver/ctl/src/layers.rs @@ -1,4 +1,6 @@ +use std::num::NonZeroU32; use std::path::{Path, PathBuf}; +use std::str::FromStr; use anyhow::Result; use camino::{Utf8Path, Utf8PathBuf}; @@ -8,7 +10,7 @@ use pageserver::task_mgr::TaskKind; use pageserver::tenant::block_io::BlockCursor; use pageserver::tenant::disk_btree::DiskBtreeReader; use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary}; -use pageserver::tenant::storage_layer::{delta_layer, image_layer}; +use pageserver::tenant::storage_layer::{delta_layer, image_layer, LayerName}; use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer}; use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; use pageserver::{page_cache, virtual_file}; @@ -20,7 +22,9 @@ use pageserver::{ }, virtual_file::VirtualFile, }; +use remote_storage::{ListingMode, RemotePath, RemoteStorageConfig}; use std::fs; +use tokio_util::sync::CancellationToken; use utils::bin_ser::BeSer; use utils::id::{TenantId, TimelineId}; @@ -57,7 +61,10 @@ pub(crate) enum LayerCmd { }, Compress { dest_path: Utf8PathBuf, - layer_file_path: Utf8PathBuf, + layer_file_path: Option, + tenant_remote_prefix: Option, + tenant_remote_config: Option, + layers_dir: Option, }, } @@ -247,18 +254,76 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { LayerCmd::Compress { dest_path, layer_file_path, + tenant_remote_prefix, + tenant_remote_config, + layers_dir, } => { pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs); pageserver::page_cache::init(100); let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error); - let stats = - ImageLayer::compression_statistics(dest_path, layer_file_path, &ctx).await?; - println!( - "Statistics: {stats:#?}\n{}", - serde_json::to_string(&stats).unwrap() - ); + if let Some(layer_file_path) = layer_file_path { + let stats = + ImageLayer::compression_statistics(dest_path, layer_file_path, &ctx).await?; + println!( + "Statistics: {stats:#?}\n{}", + serde_json::to_string(&stats).unwrap() + ); + } else if let ( + Some(tenant_remote_prefix), + Some(tenant_remote_config), + Some(layers_dir), + ) = (tenant_remote_prefix, tenant_remote_config, layers_dir) + { + let toml_document = toml_edit::Document::from_str(&tenant_remote_config)?; + let toml_item = toml_document + .get("remote_storage") + .expect("need remote_storage"); + let config = RemoteStorageConfig::from_toml(toml_item)?.expect("incomplete config"); + let storage = remote_storage::GenericRemoteStorage::from_config(&config)?; + + let cancel = CancellationToken::new(); + let path = RemotePath::from_string(&tenant_remote_prefix)?; + let max_files = NonZeroU32::new(8000); + let files_list = storage + .list(Some(&path), ListingMode::NoDelimiter, max_files, &cancel) + .await?; + + for file_key in files_list.keys.iter() { + let Some(file_name) = file_key.object_name() else { + continue; + }; + let Ok(LayerName::Image(_layer_file_name)) = LayerName::from_str(file_name) + else { + // Skipping because it's either not a layer or a delta layer + continue; + }; + let json_file_path = layers_dir.join(format!("{file_name}.json")); + if tokio::fs::try_exists(&json_file_path).await? { + // If we have already created a report for the layer, skip it. + continue; + } + let local_layer_path = layers_dir.join(file_name); + let download = storage.download(file_key, &cancel).await?; + let mut dest_layer_file = tokio::fs::File::create(&local_layer_path).await?; + let mut body = tokio_util::io::StreamReader::new(download.download_stream); + let _size = tokio::io::copy_buf(&mut body, &mut dest_layer_file).await?; + let stats = ImageLayer::compression_statistics( + &local_layer_path, + &local_layer_path, + &ctx, + ) + .await?; + + let stats_str = serde_json::to_string(&stats).unwrap(); + tokio::fs::write(json_file_path, stats_str).await?; + println!("Statistics for {file_name}: {stats:#?}\n"); + } + } else { + anyhow::bail!("No tenant dir or remote config or layers dir specified"); + } + return Ok(()); } }