diff --git a/Cargo.lock b/Cargo.lock index 952034a16b..7c74a00c2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3973,6 +3973,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "smallvec", "smol_str", "socket2 0.5.5", "sync_wrapper", @@ -5107,6 +5108,9 @@ name = "smallvec" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +dependencies = [ + "serde", +] [[package]] name = "smol_str" diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index f075c718a7..dc2aa93975 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -81,6 +81,7 @@ postgres-native-tls.workspace = true postgres-protocol.workspace = true redis.workspace = true smol_str.workspace = true +smallvec = { workspace = true, features = ["serde"] } workspace_hack.workspace = true diff --git a/proxy/src/cache/project_info.rs b/proxy/src/cache/project_info.rs index 6dea1c8f3f..794e95d84c 100644 --- a/proxy/src/cache/project_info.rs +++ b/proxy/src/cache/project_info.rs @@ -1,5 +1,4 @@ use std::{ - collections::HashSet, sync::{atomic::AtomicU64, Arc}, time::Duration, }; @@ -7,6 +6,7 @@ use std::{ use dashmap::DashMap; use hashlink::LruCache; use parking_lot::Mutex; +use smallvec::SmallVec; use smol_str::SmolStr; use tokio::time::Instant; use tracing::info; @@ -60,7 +60,27 @@ pub struct ProjectInfoCacheImpl { ip_cache: Mutex>>>>, role_cache: Mutex>>, - project2ep: DashMap>, + // endpoints per project: + // P90: 1 + // P99: 2 + // P995: 3 + // P999: 10 + // P9999: 186 + // + // Assuming 1 million projects with this distribution: + // (0.9 * 1 + 0.09 * 2 + 0.005 * 3 + 0.004 * 10 + 0.0009 * 186) * 1,000,000 + // =~ 1,500,000 endpoints + // + // 1,000,000 * size_of(SmolStr) = 24MB + // 1,500,000 * size_of(SmolStr) = 36MB + // SmallVec inline overhead: 8B * 0.9 * 1,000,000 = 7.2MB + // SmallVec outline overhead: 32B * 0.1 * 1,000,000 = 3.2MB + // + // Total size: 70.4MB. + // + // We do not need to prune this hashmap and can safely + // keep it in memory up until 100s of millions of projects + project2ep: DashMap>, start_time: Instant, ttl: Duration, @@ -204,7 +224,7 @@ impl ProjectInfoCacheImpl { self.project2ep .entry(project_id.clone()) .or_default() - .insert(endpoint_id.clone()); + .push(endpoint_id.clone()); } fn get_cache_times(&self) -> (Instant, Option) { diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index dbd46054a4..9fe76d3dad 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -66,7 +66,7 @@ rustls = { version = "0.21", features = ["dangerous_configuration"] } scopeguard = { version = "1" } serde = { version = "1", features = ["alloc", "derive"] } serde_json = { version = "1", features = ["raw_value"] } -smallvec = { version = "1", default-features = false, features = ["write"] } +smallvec = { version = "1", default-features = false, features = ["serde", "write"] } subtle = { version = "2" } time = { version = "0.3", features = ["local-offset", "macros", "serde-well-known"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }