diff --git a/Cargo.lock b/Cargo.lock index e6b0972794..669fb0eac7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1312,7 +1312,9 @@ dependencies = [ "bytes", "cbindgen", "clashmap", + "fiemap", "http 1.3.1", + "itertools 0.10.5", "libc", "measured", "metrics", @@ -2336,6 +2338,15 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" +[[package]] +name = "fiemap" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e387bc8b3342ba5cd115fb566e6bf2c82562433dffcecbc2474265cf8a875a" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "filetime" version = "0.2.22" @@ -3822,7 +3833,7 @@ dependencies = [ "prometheus", "rand 0.9.1", "rand_distr", - "twox-hash 1.6.3", + "twox-hash", ] [[package]] @@ -9045,12 +9056,6 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" -[[package]] -name = "xxhash-rust" -version = "0.8.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" - [[package]] name = "yansi" version = "1.0.1" diff --git a/pgxn/neon/communicator/Cargo.toml b/pgxn/neon/communicator/Cargo.toml index 822d4f749e..0ea13dce04 100644 --- a/pgxn/neon/communicator/Cargo.toml +++ b/pgxn/neon/communicator/Cargo.toml @@ -45,5 +45,9 @@ neon-shmem.workspace = true utils.workspace = true workspace_hack = { version = "0.1", path = "../../../workspace_hack" } +[target.'cfg(target_os = "linux")'.dependencies] +fiemap = "0.1.3" +itertools.workspace = true + [build-dependencies] cbindgen.workspace = true diff --git a/pgxn/neon/communicator/src/file_cache.rs b/pgxn/neon/communicator/src/file_cache.rs index 88141c6798..60cb1f3cd9 100644 --- a/pgxn/neon/communicator/src/file_cache.rs +++ b/pgxn/neon/communicator/src/file_cache.rs @@ -10,7 +10,7 @@ //! write it? ), but the backends use direct C library calls for that. use std::fs::File; use std::os::unix::fs::FileExt; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use std::sync::Mutex; @@ -25,13 +25,11 @@ pub const INVALID_CACHE_BLOCK: CacheBlock = u64::MAX; #[derive(Debug)] pub struct FileCache { file: Arc, - free_list: Mutex, - // NOTE(quantumish): when we punch holes in the LFC to shrink the file size, - // we *shouldn't* add them to the free list (since that's used to write new entries) - // but we still should remember them so that we can add them to the free list when - // a growth later occurs. this still has the same scalability flaws as the freelist - hole_list: Mutex>, + + // The `fiemap-rs` library doesn't expose any way to issue a FIEMAP ioctl + // on an existing file descroptor, so we have to save the path. + path: PathBuf, // metrics max_blocks_gauge: metrics::IntGauge, @@ -86,7 +84,7 @@ impl FileCache { max_blocks: initial_size, free_blocks: Vec::new(), }), - hole_list: Mutex::new(Vec::new()), + path: file_cache_path.to_path_buf(), max_blocks_gauge, num_free_blocks_gauge, }) @@ -157,24 +155,57 @@ impl FileCache { tracing::error!("failed to punch hole in LFC at block {cache_block}: {e}"); return; } - - let mut hole_list = self.hole_list.lock().unwrap(); - hole_list.push(cache_block); } /// Attempt to reclaim `num_blocks` of previously hole-punched blocks. - // TODO(quantumish): could probably just be merged w/ grow() - is there ever a reason - // to call this separately? + #[cfg(target_os = "linux")] pub fn undelete_blocks(&self, num_blocks: u64) -> u64 { - // Safety: nothing else should ever need to take both of these locks at once. - // TODO(quantumish): may just be worth putting both under the same lock. - let mut hole_list = self.hole_list.lock().unwrap(); + use itertools::Itertools; + let mut pushed = 0; let mut free_list = self.free_list.lock().unwrap(); - let amt = hole_list.len().min(num_blocks as usize); - for _ in 0..amt { - free_list.free_blocks.push(hole_list.pop().unwrap()); + let res = fiemap::fiemap(self.path.as_path()).unwrap(); + for (prev, cur) in res.map(|x| x.unwrap()).tuple_windows() { + if (prev.fe_logical + prev.fe_length) < cur.fe_logical { + let mut end = prev.fe_logical + prev.fe_length; + while end < cur.fe_logical { + free_list.free_blocks.push(end); + pushed += 1; + if pushed == num_blocks { + return 0; + } + end += BLCKSZ as u64; + } + } } - amt as u64 + num_blocks - pushed + } + + /// Attempt to reclaim `num_blocks` of previously hole-punched blocks. + // FIXME(quantumish): local tests showed this code has some buggy behavior. + #[cfg(target_os = "macos")] + pub fn undelete_blocks(&self, num_blocks: u64) -> u64 { + use nix::unistd as nix; + let mut free_list = self.free_list.lock().unwrap(); + let num_bytes = (free_list.next_free_block * BLOCKSZ) as i64; + let mut cur_pos = 0; + let mut pushed = 0; + while cur_pos < num_bytes { + let res = nix::lseek( + file.clone(), + cur_pos, + nix::Whence::SeekHole + ).unwrap(); + if res >= num_bytes { + break; + } + free_list.free_blocks.push(res); + pushed += 1; + if pushed == num_blocks { + return 0; + } + cur_pos = res + BLOCKSZ as i64; + } + num_blocks - pushed } /// Physically grows the file and expands the freelist.