From 47664e40d452a4e3f771e6c6b4905f02d6c710a2 Mon Sep 17 00:00:00 2001 From: David Freifeld Date: Thu, 26 Jun 2025 16:00:33 -0700 Subject: [PATCH] Initial work in visualizing properties of hashmap --- Cargo.lock | 299 ++++++++++++++++++++++++++++++ libs/neon-shmem/Cargo.toml | 8 + libs/neon-shmem/src/hash.rs | 26 ++- libs/neon-shmem/src/hmap_stats.rs | 139 ++++++++++++++ 4 files changed, 471 insertions(+), 1 deletion(-) create mode 100644 libs/neon-shmem/src/hmap_stats.rs diff --git a/Cargo.lock b/Cargo.lock index 4fd5f5802b..c407806b3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1336,6 +1336,12 @@ dependencies = [ "cc", ] +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colorchoice" version = "1.0.4" @@ -1595,6 +1601,42 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "core-graphics-types", + "foreign-types", + "libc", +] + +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "libc", +] + +[[package]] +name = "core-text" +version = "20.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d2790b5c08465d49f8dc05c8bcae9fea467855947db39b0f8145c091aaced5" +dependencies = [ + "core-foundation 0.9.4", + "core-graphics", + "foreign-types", + "libc", +] + [[package]] name = "cpp_demangle" version = "0.4.4" @@ -2047,6 +2089,27 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -2058,6 +2121,15 @@ dependencies = [ "syn", ] +[[package]] +name = "dlib" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" +dependencies = [ + "libloading", +] + [[package]] name = "dlv-list" version = "0.5.2" @@ -2087,6 +2159,18 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dwrote" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe1f192fcce01590bd8d839aca53ce0d11d803bf291b2a6c4ad925a8f0024be" +dependencies = [ + "lazy_static", + "libc", + "winapi", + "wio", +] + [[package]] name = "dyn-clone" version = "1.0.19" @@ -2377,6 +2461,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fdeflate" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" +dependencies = [ + "simd-adler32", +] + [[package]] name = "ff" version = "0.12.1" @@ -2455,6 +2548,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-ord" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" + [[package]] name = "fnv" version = "1.0.7" @@ -2467,6 +2566,58 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "font-kit" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7e611d49285d4c4b2e1727b72cf05353558885cc5252f93707b845dfcaf3d3" +dependencies = [ + "bitflags 2.9.1", + "byteorder", + "core-foundation 0.9.4", + "core-graphics", + "core-text", + "dirs", + "dwrote", + "float-ord", + "freetype-sys", + "lazy_static", + "libc", + "log", + "pathfinder_geometry", + "pathfinder_simd", + "walkdir", + "winapi", + "yeslogic-fontconfig-sys", +] + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -2497,6 +2648,17 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "freetype-sys" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7edc5b9669349acfda99533e9e0bcf26a51862ab43b08ee7745c55d28eb134" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -2687,6 +2849,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "gif" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80792593675e051cf94a4b111980da2ba60d4a83e43e0048c5693baab3977045" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "gimli" version = "0.31.1" @@ -3371,6 +3543,20 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "image" +version = "0.24.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5690139d2f55868e080017335e4b94cb7414274c74f1669c84fb5feba2c9f69d" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "jpeg-decoder", + "num-traits", + "png", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -3655,6 +3841,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "jpeg-decoder" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07" + [[package]] name = "js-sys" version = "0.3.77" @@ -3998,6 +4190,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -4038,6 +4231,7 @@ dependencies = [ "foldhash", "hashbrown 0.15.4 (git+https://github.com/quantumish/hashbrown.git?rev=6610e6d)", "nix 0.30.1", + "plotters", "rand 0.9.1", "rand_distr 0.5.1", "rustc-hash 2.1.1", @@ -4406,6 +4600,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "2.10.1" @@ -4842,6 +5042,25 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pathfinder_geometry" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b7e7b4ea703700ce73ebf128e1450eb69c3a8329199ffbfb9b2a0418e5ad3" +dependencies = [ + "log", + "pathfinder_simd", +] + +[[package]] +name = "pathfinder_simd" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf9027960355bf3afff9841918474a81a5f972ac6d226d518060bba758b5ad57" +dependencies = [ + "rustc_version", +] + [[package]] name = "pbkdf2" version = "0.12.2" @@ -5005,9 +5224,16 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ + "chrono", + "font-kit", + "image", + "lazy_static", "num-traits", + "pathfinder_geometry", "plotters-backend", + "plotters-bitmap", "plotters-svg", + "ttf-parser", "wasm-bindgen", "web-sys", ] @@ -5018,6 +5244,17 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" +[[package]] +name = "plotters-bitmap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ce181e3f6bf82d6c1dc569103ca7b1bd964c60ba03d7e6cdfbb3e3eb7f7405" +dependencies = [ + "gif", + "image", + "plotters-backend", +] + [[package]] name = "plotters-svg" version = "0.3.7" @@ -5027,6 +5264,19 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "png" +version = "0.17.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "polonius-the-crab" version = "0.4.2" @@ -5914,6 +6164,17 @@ dependencies = [ "bitflags 2.9.1", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -6986,6 +7247,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "simple_asn1" version = "0.6.3" @@ -8177,6 +8444,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "ttf-parser" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" + [[package]] name = "tungstenite" version = "0.21.0" @@ -8715,6 +8988,12 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "weezl" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" + [[package]] name = "which" version = "4.4.2" @@ -9067,6 +9346,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "wio" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d129932f4644ac2396cb456385cbf9e63b5b30c6e8dc4820bdca4eb082037a5" +dependencies = [ + "winapi", +] + [[package]] name = "wit-bindgen-rt" version = "0.39.0" @@ -9252,6 +9540,17 @@ dependencies = [ "time", ] +[[package]] +name = "yeslogic-fontconfig-sys" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503a066b4c037c440169d995b869046827dbc71263f6e8f3be6d77d4f3229dbd" +dependencies = [ + "dlib", + "once_cell", + "pkg-config", +] + [[package]] name = "yoke" version = "0.8.0" diff --git a/libs/neon-shmem/Cargo.toml b/libs/neon-shmem/Cargo.toml index 1c47ffed37..3a22ce6c8b 100644 --- a/libs/neon-shmem/Cargo.toml +++ b/libs/neon-shmem/Cargo.toml @@ -10,6 +10,7 @@ nix.workspace = true workspace_hack = { version = "0.1", path = "../../workspace_hack" } rustc-hash = { version = "2.1.1" } rand = "0.9.1" +plotters = { version = "0.3.7", optional = true } [dev-dependencies] criterion = { workspace = true, features = ["html_reports"] } @@ -21,6 +22,7 @@ seahash = "4.1.0" hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" } foldhash = "0.1.5" + [target.'cfg(target_os = "macos")'.dependencies] tempfile = "3.14.0" @@ -28,3 +30,9 @@ tempfile = "3.14.0" name = "hmap_resize" harness = false +[[bin]] +name = "hmap_stats" +path = "src/hmap_stats.rs" + +[features] +stats = ["dep:plotters"] diff --git a/libs/neon-shmem/src/hash.rs b/libs/neon-shmem/src/hash.rs index 36fbb1112c..6cc641814a 100644 --- a/libs/neon-shmem/src/hash.rs +++ b/libs/neon-shmem/src/hash.rs @@ -490,5 +490,29 @@ where inner.alloc_limit = INVALID_POS; Ok(()) - } + } + + #[cfg(feature = "stats")] + pub fn dict_len(&self) -> usize { + let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); + map.inner.dictionary.len() + } + + #[cfg(feature = "stats")] + pub fn chain_distribution(&self) -> (Vec<(usize, usize)>, usize) { + let map = unsafe { self.shared_ptr.as_mut() }.unwrap(); + let mut out = Vec::new(); + let mut max = 0; + for (i, d) in map.inner.dictionary.iter().enumerate() { + let mut curr = *d; + let mut len = 0; + while curr != INVALID_POS { + curr = map.inner.buckets[curr as usize].next; + len += 1; + } + out.push((i, len)); + max = max.max(len); + } + (out, max) + } } diff --git a/libs/neon-shmem/src/hmap_stats.rs b/libs/neon-shmem/src/hmap_stats.rs new file mode 100644 index 0000000000..9e55ad1f05 --- /dev/null +++ b/libs/neon-shmem/src/hmap_stats.rs @@ -0,0 +1,139 @@ +use neon_shmem::hash::HashMapAccess; +use neon_shmem::hash::HashMapInit; +use neon_shmem::hash::entry::Entry; +use rand::prelude::*; +use rand::distr::{Distribution, StandardUniform}; +use plotters::prelude::*; + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +#[repr(C)] +pub struct FileCacheKey { + pub _spc_id: u32, + pub _db_id: u32, + pub _rel_number: u32, + pub _fork_num: u32, + pub _block_num: u32, +} + +impl Distribution for StandardUniform { + // questionable, but doesn't need to be good randomness + fn sample(&self, rng: &mut R) -> FileCacheKey { + FileCacheKey { + _spc_id: rng.random(), + _db_id: rng.random(), + _rel_number: rng.random(), + _fork_num: rng.random(), + _block_num: rng.random() + } + } +} + +#[derive(Clone, Debug)] +#[repr(C)] +pub struct FileCacheEntry { + pub _offset: u32, + pub _access_count: u32, + pub _prev: *mut FileCacheEntry, + pub _next: *mut FileCacheEntry, + pub _state: [u32; 8], +} + +impl FileCacheEntry { + fn dummy() -> Self { + Self { + _offset: 0, + _access_count: 0, + _prev: std::ptr::null_mut(), + _next: std::ptr::null_mut(), + _state: [0; 8] + } + } +} + +// Utilities for applying operations. + +#[derive(Clone, Debug)] +struct TestOp(K, Option); + +fn apply_op( + op: TestOp, + map: &mut HashMapAccess, +) { + let hash = map.get_hash_value(&op.0); + let entry = map.entry_with_hash(op.0, hash); + + match op.1 { + Some(new) => { + match entry { + Entry::Occupied(mut e) => Some(e.insert(new)), + Entry::Vacant(e) => { e.insert(new).unwrap(); None }, + } + }, + None => { + match entry { + Entry::Occupied(e) => Some(e.remove()), + Entry::Vacant(_) => None, + } + }, + }; +} + +#[cfg(feature = "stats")] +fn main() { + let ideal_filled = 16_000_000; + let size = 20_000_000; + let mut writer = HashMapInit::new_resizeable(size, size).attach_writer(); + let mut rng = rand::rng(); + while writer.get_num_buckets_in_use() < ideal_filled as usize { + let key: FileCacheKey = rng.random(); + let val = FileCacheEntry::dummy(); + apply_op(TestOp(key, Some(val)), &mut writer); + } + println!("Inserted {ideal_filled} entries into a map with capacity {size}."); + let (distr, max) = writer.chain_distribution(); + + let root_area = BitMapBackend::new("chain_distr.png", (800, 400)) + .into_drawing_area(); + root_area.fill(&WHITE).unwrap(); + + let mut ctx = ChartBuilder::on(&root_area) + .set_label_area_size(LabelAreaPosition::Left, 40) + .set_label_area_size(LabelAreaPosition::Bottom, 40) + .build_cartesian_2d((0..max).into_segmented(), (0..ideal_filled * 2).log_scale()) + .unwrap(); + + ctx.configure_mesh() + .y_label_formatter(&|y| format!("{:e}", y)) + .draw().unwrap(); + + ctx.draw_series( + Histogram::vertical(&ctx) + .margin(10) + .data(distr.iter().map(|x| (x.1, 1))) + ).unwrap(); + + // let root_area = BitMapBackend::new("dict_distr.png", (2000, 400)) + // .into_drawing_area(); + // root_area.fill(&WHITE).unwrap(); + + // let mut ctx = ChartBuilder::on(&root_area) + // .set_label_area_size(LabelAreaPosition::Left, 40) + // .set_label_area_size(LabelAreaPosition::Bottom, 40) + // .build_cartesian_2d((0..writer.dict_len()), (0..(max as f32 * 1.5) as usize)) + // .unwrap(); + + // ctx.configure_mesh().draw().unwrap(); + + // ctx.draw_series(LineSeries::new( + // distr.iter().map(|(bin, count)| (*bin, *count)), + // &RED, + // )).unwrap(); + + // println!("Longest chain: {}", writer.longest_chain()); +} + +#[cfg(not(feature = "stats"))] +fn main() { + println!("Enable the `stats` feature to use this binary!"); +} +