Compare commits

...

4 Commits

Author SHA1 Message Date
Bojan Serafimov
a4bd82cb81 Merge branch 'layer-stats' of github.com:neondatabase/neon into layer-stats 2023-06-10 10:47:42 -04:00
Bojan Serafimov
c64798956d Address comments 2023-06-10 10:46:28 -04:00
bojanserafimov
39bbaecb03 accept suggestion
Co-authored-by: Joonas Koivunen <joonas@neon.tech>
2023-06-10 10:35:13 -04:00
Bojan Serafimov
26dca374eb Add layer stats cli 2023-06-02 16:46:48 -04:00
3 changed files with 82 additions and 3 deletions

1
Cargo.lock generated
View File

@@ -2530,6 +2530,7 @@ dependencies = [
"bytes",
"clap 4.3.0",
"git-version",
"itertools",
"pageserver",
"postgres_ffi",
"svg_fmt",

View File

@@ -16,3 +16,4 @@ postgres_ffi.workspace = true
utils.workspace = true
svg_fmt.workspace = true
workspace_hack.workspace = true
itertools.workspace = true

View File

@@ -41,9 +41,18 @@ pub(crate) enum LayerCmd {
/// The id from list-layer command
id: usize,
},
/// Output layer statistics
GetStats {
path: PathBuf,
tenant: String,
timeline: String,
/// The id from list-layer command
id: usize,
},
}
fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
// Return (key, value.len) for all keys, sorted by key.
fn read_delta_file(path: impl AsRef<Path>) -> Result<Vec<(Key, usize)>> {
use pageserver::tenant::blob_io::BlobCursor;
use pageserver::tenant::block_io::BlockReader;
@@ -70,11 +79,48 @@ fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
},
)?;
let mut cursor = BlockCursor::new(&file);
let mut result = vec![];
for (k, v) in all {
let value = cursor.read_blob(v.pos())?;
println!("key:{} value_len:{}", k, value.len());
result.push((k, value.len()));
}
// TODO(chi): special handling for last key?
Ok(result)
}
// We divide the entire i128 keyspace into pre-assigned fixed segments,
// 8MB each. Group keys by segment, and report segment size for each.
//
// 8MB is chosen as the segment size because we're unlikely to make
// s3 partial downloads smaller than 8MB (due to cost). So summarizing
// layer metadata in 8MB segments could be enough to generate good test
// data for write amplification tests.
//
// Note that the segments are fixed, and don't depend on what keyspace
// is actually in use.
fn read_delta_segments(path: impl AsRef<Path>) -> Result<Vec<(i128, usize)>> {
fn key_to_segment(key: &Key) -> i128 {
// A page is 8KB. So 1024 pages are 8MB.
key.to_i128() >> 10
}
use itertools::Itertools;
let delta_metadata = read_delta_file(path)?;
let group_iter = delta_metadata.iter().group_by(|(k, _)| key_to_segment(k));
let group_sizes = group_iter.into_iter().map(|(segment, lengths_group)| {
let sum: usize = lengths_group.map(|(_k, len)| len).sum();
(segment, sum)
});
Ok(group_sizes.collect())
}
fn summarize_delta_file(path: impl AsRef<Path>) -> Result<()> {
// TODO write in some compressed binary format
for (segment, size) in read_delta_segments(path)? {
println!("segment:{} size:{}", segment, size);
}
Ok(())
}
@@ -153,7 +199,38 @@ pub(crate) fn main(cmd: &LayerCmd) -> Result<()> {
);
if layer_file.is_delta {
read_delta_file(layer.path())?;
for (k, len) in read_delta_file(layer.path())? {
println!("key:{} value_len:{}", k, len);
}
} else {
anyhow::bail!("not supported yet :(");
}
break;
}
idx += 1;
}
}
}
LayerCmd::GetStats {
path,
tenant,
timeline,
id,
} => {
let timeline_path = path
.join("tenants")
.join(tenant)
.join("timelines")
.join(timeline);
let mut idx = 0;
for layer in fs::read_dir(timeline_path)? {
let layer = layer?;
if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
{
if *id == idx {
if layer_file.is_delta {
summarize_delta_file(layer.path())?;
} else {
anyhow::bail!("not supported yet :(");
}