diff --git a/libs/alloc-metrics/benches/alloc.rs b/libs/alloc-metrics/benches/alloc.rs index 75e7822be1..85f5501c38 100644 --- a/libs/alloc-metrics/benches/alloc.rs +++ b/libs/alloc-metrics/benches/alloc.rs @@ -2,14 +2,86 @@ use std::alloc::{GlobalAlloc, Layout, System, handle_alloc_error}; use alloc_metrics::TrackedAllocator; use criterion::{ - AxisScale, BatchSize, BenchmarkId as Id, Criterion, PlotConfiguration, Throughput, - criterion_group, criterion_main, + AxisScale, BenchmarkGroup, BenchmarkId, Criterion, PlotConfiguration, measurement::Measurement, }; use measured::FixedCardinalityLabel; use tikv_jemallocator::Jemalloc; -criterion_group!(benches, bench_alloc); -criterion_main!(benches); +fn main() { + let mut c = Criterion::default().configure_from_args(); + bench(&mut c); + c.final_summary(); +} + +#[rustfmt::skip] +fn bench(c: &mut Criterion) { + bench_alloc(c.benchmark_group("alloc/system"), &System, &ALLOC_SYSTEM); + bench_alloc(c.benchmark_group("alloc/jemalloc"), &Jemalloc, &ALLOC_JEMALLOC); + + bench_dealloc(c.benchmark_group("dealloc/system"), &System, &ALLOC_SYSTEM); + bench_dealloc(c.benchmark_group("dealloc/jemalloc"), &Jemalloc, &ALLOC_JEMALLOC); +} + +#[derive(FixedCardinalityLabel, Clone, Copy, Debug)] +#[label(singleton = "memory_context")] +pub enum MemoryContext { + Root, + Test, +} + +static ALLOC_SYSTEM: TrackedAllocator = + unsafe { TrackedAllocator::new(System, MemoryContext::Root) }; +static ALLOC_JEMALLOC: TrackedAllocator = + unsafe { TrackedAllocator::new(Jemalloc, MemoryContext::Root) }; + +const KB: u64 = 1024; +const SIZES: [u64; 6] = [64, 256, KB, 4 * KB, 16 * KB, KB * KB]; + +fn bench_alloc( + mut g: BenchmarkGroup<'_, impl Measurement>, + alloc1: &'static A, + alloc2: &'static TrackedAllocator, +) { + g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for size in SIZES { + let layout = Layout::from_size_align(size as usize, 8).unwrap(); + + g.throughput(criterion::Throughput::Bytes(size)); + g.bench_with_input(BenchmarkId::new("default", size), &layout, |b, &layout| { + let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64); + b.iter_batched(|| {}, |()| Alloc::new(alloc1, layout), bs); + }); + g.bench_with_input(BenchmarkId::new("tracked", size), &layout, |b, &layout| { + let _scope = alloc2.scope(MemoryContext::Test); + + let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64); + b.iter_batched(|| {}, |()| Alloc::new(alloc2, layout), bs); + }); + } +} + +fn bench_dealloc( + mut g: BenchmarkGroup<'_, impl Measurement>, + alloc1: &'static A, + alloc2: &'static TrackedAllocator, +) { + g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); + for size in SIZES { + let layout = Layout::from_size_align(size as usize, 8).unwrap(); + + g.throughput(criterion::Throughput::Bytes(size)); + g.bench_with_input(BenchmarkId::new("default", size), &layout, |b, &layout| { + let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64); + b.iter_batched(|| Alloc::new(alloc1, layout), drop, bs); + }); + g.bench_with_input(BenchmarkId::new("tracked", size), &layout, |b, &layout| { + let _scope = alloc2.scope(MemoryContext::Test); + + let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64); + b.iter_batched(|| Alloc::new(alloc2, layout), drop, bs); + }); + } +} struct Alloc<'a, A: GlobalAlloc> { alloc: &'a A, @@ -36,72 +108,3 @@ impl<'a, A: GlobalAlloc> Drop for Alloc<'a, A> { unsafe { self.alloc.dealloc(self.ptr, self.layout) }; } } - -#[derive(FixedCardinalityLabel, Clone, Copy, Debug)] -#[label(singleton = "memory_context")] -pub enum MemoryContext { - Root, - Test, -} - -static ALLOC_SYSTEM: TrackedAllocator = - unsafe { TrackedAllocator::new(System, MemoryContext::Root) }; -static ALLOC_JEMALLOC: TrackedAllocator = - unsafe { TrackedAllocator::new(Jemalloc, MemoryContext::Root) }; - -fn bench_alloc(c: &mut Criterion) { - const KB: u64 = 1024; - let sizes = [64, 256, KB, 4 * KB, 16 * KB, KB * KB]; - - let mut g = c.benchmark_group("alloc"); - g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); - for size in sizes { - g.throughput(Throughput::Bytes(size)); - - let layout = Layout::from_size_align(size as usize, 8).unwrap(); - - let bs = BatchSize::NumBatches(10 + size.ilog2() as u64); - - g.bench_with_input(Id::new("system", size), &layout, |b, layout| { - b.iter_batched(|| {}, |()| Alloc::new(&System, *layout), bs); - }); - g.bench_with_input(Id::new("tracked[system]", size), &layout, |b, layout| { - let _scope = ALLOC_SYSTEM.scope(MemoryContext::Test); - b.iter_batched(|| {}, |()| Alloc::new(&ALLOC_SYSTEM, *layout), bs); - }); - g.bench_with_input(Id::new("jemalloc", size), &layout, |b, layout| { - b.iter_batched(|| {}, |()| Alloc::new(&Jemalloc, *layout), bs); - }); - g.bench_with_input(Id::new("tracked[jemalloc]", size), &layout, |b, layout| { - let _scope = ALLOC_JEMALLOC.scope(MemoryContext::Test); - b.iter_batched(|| {}, |()| Alloc::new(&ALLOC_JEMALLOC, *layout), bs); - }); - } - g.finish(); - - let mut g = c.benchmark_group("dealloc"); - g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); - for size in sizes { - g.throughput(Throughput::Bytes(size)); - - let layout = Layout::from_size_align(size as usize, 8).unwrap(); - - let bs = BatchSize::NumBatches(10 + size.ilog2() as u64); - - g.bench_with_input(Id::new("system", size), &layout, |b, layout| { - b.iter_batched(|| Alloc::new(&System, *layout), drop, bs) - }); - g.bench_with_input(Id::new("tracked[system]", size), &layout, |b, layout| { - let _scope = ALLOC_SYSTEM.scope(MemoryContext::Test); - b.iter_batched(|| Alloc::new(&ALLOC_SYSTEM, *layout), drop, bs) - }); - g.bench_with_input(Id::new("jemalloc", size), &layout, |b, layout| { - b.iter_batched(|| Alloc::new(&Jemalloc, *layout), drop, bs) - }); - g.bench_with_input(Id::new("tracked[jemalloc]", size), &layout, |b, layout| { - let _scope = ALLOC_JEMALLOC.scope(MemoryContext::Test); - b.iter_batched(|| Alloc::new(&ALLOC_JEMALLOC, *layout), drop, bs) - }); - } - g.finish(); -} diff --git a/libs/alloc-metrics/src/counters.rs b/libs/alloc-metrics/src/counters.rs index 852a83b49f..19dda08393 100644 --- a/libs/alloc-metrics/src/counters.rs +++ b/libs/alloc-metrics/src/counters.rs @@ -16,9 +16,9 @@ pub struct DenseCounterPairVec< } impl>, L: FixedCardinalityLabel + LabelGroup> - Default for DenseCounterPairVec + DenseCounterPairVec { - fn default() -> Self { + pub fn new() -> Self { Self { vec: DenseMetricVec::new(), _marker: PhantomData, diff --git a/libs/alloc-metrics/src/lib.rs b/libs/alloc-metrics/src/lib.rs index d524c7ddfd..8f53e95300 100644 --- a/libs/alloc-metrics/src/lib.rs +++ b/libs/alloc-metrics/src/lib.rs @@ -31,10 +31,7 @@ pub struct TrackedAllocator>>, - /// per thread state containing low contention counters for faster allocations. - thread_state: OnceLock>>, + thread: OnceLock>, /// where thread alloc data is eventually saved to, even if threads are shutdown. global: OnceLock>, @@ -59,8 +56,7 @@ where count: AtomicU64::new(0), }, }, - thread_scope: OnceLock::new(), - thread_state: OnceLock::new(), + thread: OnceLock::new(), global: OnceLock::new(), } } @@ -77,73 +73,71 @@ where } fn register_thread_inner(&'static self) -> &'static Cell { - self.thread_state - .get_or_init(ThreadLocal::new) - .get_or(|| ThreadState { - counters: AllocCounter::default(), - global: self.global.get_or_init(AllocCounter::default), - }); + let thread = self.thread.get_or_init(|| RegisteredThread { + scope: ThreadLocal::new(), + state: ThreadLocal::new(), + }); - self.thread_scope - .get_or_init(ThreadLocal::new) - .get_or(|| Cell::new(self.default_tag)) + thread.state.get_or(|| ThreadState { + counters: AllocCounter::new(), + global: self.global.get_or_init(AllocCounter::new), + }); + + thread.scope.get_or(|| Cell::new(self.default_tag)) } fn current_counters_alloc_safe(&self) -> Option<&AllocCounter> { // We are being very careful here to not allocate or panic. - self.thread_state + self.thread .get() - .and_then(ThreadLocal::get) + .and_then(|s| s.state.get()) .map(|s| &s.counters) .or_else(|| self.global.get()) } fn current_tag_alloc_safe(&self) -> T { // We are being very careful here to not allocate or panic. - self.thread_scope + self.thread .get() - .and_then(ThreadLocal::get) + .and_then(|s| s.scope.get()) .map_or(self.default_tag, Cell::get) } } -impl TrackedAllocator -where - T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup, -{ - unsafe fn alloc_inner(&self, layout: Layout, alloc: impl FnOnce(Layout) -> *mut u8) -> *mut u8 { - let Ok((tagged_layout, tag_offset)) = layout.extend(Layout::new::()) else { - return std::ptr::null_mut(); - }; - let tagged_layout = tagged_layout.pad_to_align(); +macro_rules! alloc { + ($alloc_fn:ident) => { + unsafe fn $alloc_fn(&self, layout: Layout) -> *mut u8 { + let Ok((tagged_layout, tag_offset)) = layout.extend(Layout::new::()) else { + return std::ptr::null_mut(); + }; + let tagged_layout = tagged_layout.pad_to_align(); - // Safety: The layout is not zero-sized. - let ptr = alloc(tagged_layout); + // Safety: The layout is not zero-sized. + let ptr = unsafe { self.inner.$alloc_fn(tagged_layout) }; - // allocation failed. - if ptr.is_null() { - return ptr; + // allocation failed. + if ptr.is_null() { + return ptr; + } + + let tag = self.current_tag_alloc_safe(); + + // Allocation successful. Write our tag + // Safety: tag_offset is inbounds of the ptr + unsafe { ptr.add(tag_offset).cast::().write(tag) } + + let metric = if let Some(counters) = self.current_counters_alloc_safe() { + counters.vec.get_metric(tag) + } else { + // if tag is not default, then global would have been registered, therefore tag must be default. + &self.default_counters + }; + + metric.inc.count.fetch_add(layout.size() as u64, Relaxed); + + ptr } - - let tag = self.current_tag_alloc_safe(); - - // Allocation successful. Write our tag - // Safety: tag_offset is inbounds of the ptr - unsafe { ptr.add(tag_offset).cast::().write(tag) } - - let metric = if let Some(counters) = self.current_counters_alloc_safe() { - // safety: caller ensured that is implemented correctly. - let id = unsafe { counters.vec.try_with_labels(tag).unwrap_unchecked() }; - counters.vec.get_metric(id) - } else { - // if tag is not default, then global would have been registered, therefore tag must be default. - &self.default_counters - }; - - metric.inc_by(layout.size() as u64); - - ptr - } + }; } // We will tag our allocation by adding `T` to the end of the layout. @@ -156,19 +150,8 @@ where A: GlobalAlloc, T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup, { - unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - // safety: same as caller - unsafe { self.alloc_inner(layout, |tagged_layout| self.inner.alloc(tagged_layout)) } - } - - unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { - // safety: same as caller - unsafe { - self.alloc_inner(layout, |tagged_layout| { - self.inner.alloc_zeroed(tagged_layout) - }) - } - } + alloc!(alloc); + alloc!(alloc_zeroed); unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { // SAFETY: the caller must ensure that the `new_size` does not overflow. @@ -209,12 +192,8 @@ where unsafe { new_ptr.add(new_tag_offset).cast::().write(new_tag) } let (new_metric, old_metric) = if let Some(counters) = self.current_counters_alloc_safe() { - // safety: caller ensured that is implemented correctly. - let new_id = unsafe { counters.vec.try_with_labels(new_tag).unwrap_unchecked() }; - // safety: caller ensured that is implemented correctly. - let old_id = unsafe { counters.vec.try_with_labels(tag).unwrap_unchecked() }; - let new_metric = counters.vec.get_metric(new_id); - let old_metric = counters.vec.get_metric(old_id); + let new_metric = counters.vec.get_metric(new_tag); + let old_metric = counters.vec.get_metric(tag); (new_metric, old_metric) } else { @@ -230,8 +209,8 @@ where (0, (layout.size() - new_layout.size()) as u64) }; - new_metric.inc.inc_by(inc); - old_metric.dec.inc_by(dec); + new_metric.inc.count.fetch_add(inc, Relaxed); + old_metric.dec.count.fetch_add(dec, Relaxed); new_ptr } @@ -252,15 +231,13 @@ where unsafe { self.inner.dealloc(ptr, tagged_layout) } let metric = if let Some(counters) = self.current_counters_alloc_safe() { - // safety: caller ensured that is implemented correctly. - let id = unsafe { counters.vec.try_with_labels(tag).unwrap_unchecked() }; - counters.vec.get_metric(id) + counters.vec.get_metric(tag) } else { // if tag is not default, then global would have been registered, therefore tag must be default. &self.default_counters }; - metric.dec_by(layout.size() as u64); + metric.dec.count.fetch_add(layout.size() as u64, Relaxed); } } @@ -287,6 +264,13 @@ impl CounterPairAssoc for AllocPair { type LabelGroupSet = StaticLabelSet; } +struct RegisteredThread { + /// Current memory context for this thread. + scope: ThreadLocal>, + /// per thread state containing low contention counters for faster allocations. + state: ThreadLocal>, +} + struct ThreadState { counters: AllocCounter, global: &'static AllocCounter, @@ -298,14 +282,12 @@ impl Drop for ThreadState { // iterate over all labels for tag in (0..T::cardinality()).map(T::decode) { // load and reset the counts in the thread-local counters. - let id = self.counters.vec.with_labels(tag); - let m = self.counters.vec.get_metric_mut(id); + let m = self.counters.vec.get_metric_mut(tag); let inc = *m.inc.count.get_mut(); let dec = *m.dec.count.get_mut(); // add the counts into the global counters. - let id = self.global.vec.with_labels(tag); - let m = self.global.vec.get_metric(id); + let m = self.global.vec.get_metric(tag); m.inc.count.fetch_add(inc, Relaxed); m.dec.count.fetch_add(dec, Relaxed); } @@ -319,14 +301,13 @@ where CounterState: MetricEncoding, { fn collect_group_into(&self, enc: &mut Enc) -> Result<(), Enc::Err> { - let global = self.global.get_or_init(AllocCounter::default); + let global = self.global.get_or_init(AllocCounter::new); // iterate over all counter threads - for s in self.thread_state.get().into_iter().flat_map(|s| s.iter()) { + for s in self.thread.get().into_iter().flat_map(|s| s.state.iter()) { // iterate over all labels for tag in (0..T::cardinality()).map(T::decode) { - let id = s.counters.vec.with_labels(tag); - sample(global, s.counters.vec.get_metric(id), tag); + sample(global, s.counters.vec.get_metric(tag), tag); } } @@ -346,8 +327,7 @@ fn sample( let dec = local.dec.count.swap(0, Relaxed); // add the counts into the global counters. - let id = global.vec.with_labels(tag); - let m = global.vec.get_metric(id); + let m = global.vec.get_metric(tag); m.inc.count.fetch_add(inc, Relaxed); m.dec.count.fetch_add(dec, Relaxed); } diff --git a/libs/alloc-metrics/src/metric_vec.rs b/libs/alloc-metrics/src/metric_vec.rs index fe8cd84826..c210559be0 100644 --- a/libs/alloc-metrics/src/metric_vec.rs +++ b/libs/alloc-metrics/src/metric_vec.rs @@ -2,20 +2,16 @@ use measured::{ FixedCardinalityLabel, LabelGroup, - label::{LabelGroupSet, StaticLabelSet}, + label::StaticLabelSet, metric::{ MetricEncoding, MetricFamilyEncoding, MetricType, group::Encoding, name::MetricNameEncoder, }, }; pub struct DenseMetricVec { - metrics: VecInner, + metrics: Box<[M]>, metadata: M::Metadata, - label_set: StaticLabelSet, -} - -enum VecInner { - Dense(Box<[M]>), + _label_set: StaticLabelSet, } fn new_dense(c: usize) -> Box<[M]> { @@ -34,72 +30,32 @@ where } } -impl Default for DenseMetricVec -where - M::Metadata: Default, -{ - fn default() -> Self { - Self::new() - } -} - -impl VecInner { - fn get_metric(&self, id: usize) -> &M { - match self { - VecInner::Dense(metrics) => &metrics[id], - } - } - - fn get_metric_mut(&mut self, id: usize) -> &mut M { - match self { - VecInner::Dense(metrics) => &mut metrics[id], - } - } -} - impl DenseMetricVec { /// Create a new metric vec with the given label set and metric metadata pub fn with_metadata(metadata: M::Metadata) -> Self { - let metrics = VecInner::Dense(new_dense(L::cardinality())); - Self { - metrics, + metrics: new_dense(L::cardinality()), metadata, - label_set: StaticLabelSet::new(), + _label_set: StaticLabelSet::new(), } } - /// Get an identifier for the specific metric identified by this label group + /// Get the individual metric at the given identifier. /// /// # Panics - /// Panics if the label group is not contained within the label set. - pub fn with_labels(&self, label: L) -> usize { - self.try_with_labels(label) - .expect("label group was not contained within this label set") - } - - /// Get an identifier for the specific metric identified by this label group - /// - /// # Errors - /// Returns None if the label group is not contained within the label set. - pub fn try_with_labels(&self, label: L) -> Option { - self.label_set.encode(label) + /// Can panic or cause strange behaviour if the label ID comes from a different metric family. + pub fn get_metric(&self, label: L) -> &M { + // safety: The caller has guarantees that the label encoding is valid. + unsafe { self.metrics.get_unchecked(label.encode()) } } /// Get the individual metric at the given identifier. /// /// # Panics /// Can panic or cause strange behaviour if the label ID comes from a different metric family. - pub fn get_metric(&self, id: usize) -> &M { - self.metrics.get_metric(id) - } - - /// Get the individual metric at the given identifier. - /// - /// # Panics - /// Can panic or cause strange behaviour if the label ID comes from a different metric family. - pub fn get_metric_mut(&mut self, id: usize) -> &mut M { - self.metrics.get_metric_mut(id) + pub fn get_metric_mut(&mut self, label: L) -> &mut M { + // safety: The caller has guarantees that the label encoding is valid. + unsafe { self.metrics.get_unchecked_mut(label.encode()) } } } @@ -108,17 +64,8 @@ impl, L: FixedCardinalityLabel + LabelGroup, T: Encoding> { fn collect_family_into(&self, name: impl MetricNameEncoder, enc: &mut T) -> Result<(), T::Err> { M::write_type(&name, enc)?; - match &self.metrics { - VecInner::Dense(m) => { - for (index, value) in m.iter().enumerate() { - value.collect_into( - &self.metadata, - self.label_set.decode_dense(index), - &name, - enc, - )?; - } - } + for (index, value) in self.metrics.iter().enumerate() { + value.collect_into(&self.metadata, L::decode(index), &name, enc)?; } Ok(()) }