use std::{alloc::*, fmt, ptr::NonNull};
use cfg_if::cfg_if;
use crate::stats::StatsSet;
#[cfg(target_os = "macos")]
use crate::util::sync::{CachePadded, PThreadKey};
#[cfg(not(target_os = "macos"))]
use std::cell::UnsafeCell;
#[cfg(test)]
#[global_allocator]
static ALLOC: AllocProfiler = AllocProfiler::system();
#[derive(Debug, Default)]
pub struct AllocProfiler<Alloc = System> {
alloc: Alloc,
}
unsafe impl<A: GlobalAlloc> GlobalAlloc for AllocProfiler<A> {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
if let Some(mut info) = ThreadAllocInfo::try_current() {
let info = unsafe { info.as_mut() };
info.tally(AllocOp::Alloc, layout.size());
};
self.alloc.alloc(layout)
}
unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
if let Some(mut info) = ThreadAllocInfo::try_current() {
let info = unsafe { info.as_mut() };
info.tally(AllocOp::Alloc, layout.size());
};
self.alloc.alloc_zeroed(layout)
}
unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
if let Some(mut info) = ThreadAllocInfo::try_current() {
let info = unsafe { info.as_mut() };
let shrink = new_size < layout.size();
info.tally(
AllocOp::realloc(shrink),
if shrink { layout.size() - new_size } else { new_size - layout.size() },
);
};
self.alloc.realloc(ptr, layout, new_size)
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
if let Some(mut info) = ThreadAllocInfo::try_current() {
let info = unsafe { info.as_mut() };
info.tally(AllocOp::Dealloc, layout.size());
};
self.alloc.dealloc(ptr, layout)
}
}
impl AllocProfiler {
#[inline]
pub const fn system() -> Self {
Self::new(System)
}
}
impl<A> AllocProfiler<A> {
#[inline]
pub const fn new(alloc: A) -> Self {
Self { alloc }
}
}
#[derive(Default)]
pub(crate) struct ThreadAllocInfo {
pub tallies: ThreadAllocTallyMap,
}
#[cfg(not(target_os = "macos"))]
thread_local! {
static CURRENT_THREAD_INFO: UnsafeCell<ThreadAllocInfo> = const {
UnsafeCell::new(ThreadAllocInfo::new())
};
}
#[cfg(target_os = "macos")]
static ALLOC_PTHREAD_KEY: CachePadded<PThreadKey<ThreadAllocInfo>> = CachePadded(PThreadKey::new());
impl ThreadAllocInfo {
#[inline]
#[cfg(not(target_os = "macos"))]
pub const fn new() -> Self {
Self { tallies: ThreadAllocTallyMap::new() }
}
#[inline]
pub fn current() -> Option<NonNull<Self>> {
cfg_if! {
if #[cfg(target_os = "macos")] {
return Self::try_current().or_else(slow_impl);
} else {
Self::try_current()
}
}
#[cfg(target_os = "macos")]
#[cold]
#[inline(never)]
fn slow_impl() -> Option<NonNull<ThreadAllocInfo>> {
unsafe {
let layout = Layout::new::<ThreadAllocInfo>();
let Some(info_alloc) = NonNull::new(unsafe { System.alloc_zeroed(layout) }) else {
handle_alloc_error(layout);
};
let success = ALLOC_PTHREAD_KEY.0.set(info_alloc.as_ptr().cast(), |this| {
System.dealloc(this.as_ptr().cast(), Layout::new::<ThreadAllocInfo>());
});
if !success {
System.dealloc(info_alloc.as_ptr(), layout);
return None;
}
#[cfg(all(not(miri), not(feature = "dyn_thread_local"), target_arch = "x86_64"))]
unsafe {
crate::util::sync::fast::set_static_thread_local(info_alloc.as_ptr());
};
Some(info_alloc.cast())
}
}
}
#[inline]
pub fn try_current() -> Option<NonNull<Self>> {
cfg_if! {
if #[cfg(target_os = "macos")] {
#[cfg(all(
not(miri),
not(feature = "dyn_thread_local"),
target_arch = "x86_64",
))]
return NonNull::new(unsafe {
crate::util::sync::fast::get_static_thread_local::<Self>().cast_mut()
});
#[allow(unreachable_code)]
ALLOC_PTHREAD_KEY.0.get()
} else {
CURRENT_THREAD_INFO.try_with(|info| unsafe {
NonNull::new_unchecked(info.get())
}).ok()
}
}
}
pub fn clear(&mut self) {
for tally in &mut self.tallies.values {
tally.count = 0;
tally.size = 0;
}
}
#[inline]
fn tally(&mut self, op: AllocOp, size: usize) {
self.tally_n(op, 1, size);
}
#[inline]
fn tally_n(&mut self, op: AllocOp, count: usize, size: usize) {
let tally = self.tallies.get_mut(op);
tally.count += count as ThreadAllocCount;
tally.size += size as ThreadAllocCount;
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(C, align(16))]
pub(crate) struct AllocTally<Count> {
pub count: Count,
pub size: Count,
}
pub(crate) type ThreadAllocCount = condtype::num::Usize64;
pub(crate) type ThreadAllocTally = AllocTally<ThreadAllocCount>;
pub(crate) type TotalAllocTally = AllocTally<u128>;
impl AllocTally<StatsSet<f64>> {
pub fn is_zero(&self) -> bool {
self.count.is_zero() && self.size.is_zero()
}
}
impl<C> AllocTally<C> {
#[inline]
pub fn as_array(&self) -> &[C; 2] {
unsafe { &*(self as *const _ as *const _) }
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub(crate) enum AllocOp {
Grow,
Shrink,
Alloc,
Dealloc,
}
impl AllocOp {
pub const ALL: [Self; 4] = {
use AllocOp::*;
[Grow, Shrink, Alloc, Dealloc]
};
#[inline]
pub fn realloc(shrink: bool) -> Self {
if shrink {
Self::Shrink
} else {
Self::Grow
}
}
#[inline]
pub fn name(self) -> &'static str {
match self {
Self::Grow => "grow",
Self::Shrink => "shrink",
Self::Alloc => "alloc",
Self::Dealloc => "dealloc",
}
}
#[inline]
pub fn prefix(self) -> &'static str {
match self {
Self::Grow => "grow:",
Self::Shrink => "shrink:",
Self::Alloc => "alloc:",
Self::Dealloc => "dealloc:",
}
}
}
#[derive(Clone, Copy, Default, PartialEq, Eq)]
pub(crate) struct AllocOpMap<T> {
pub values: [T; 4],
}
pub(crate) type ThreadAllocTallyMap = AllocOpMap<ThreadAllocTally>;
pub(crate) type TotalAllocTallyMap = AllocOpMap<TotalAllocTally>;
impl<T: fmt::Debug> fmt::Debug for AllocOpMap<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_map().entries(AllocOp::ALL.iter().map(|&op| (op.name(), self.get(op)))).finish()
}
}
impl ThreadAllocTallyMap {
#[inline]
pub const fn new() -> Self {
unsafe { std::mem::transmute([0u8; std::mem::size_of::<Self>()]) }
}
#[inline]
pub fn is_empty(&self) -> bool {
self.values.iter().all(|tally| tally.count == 0 && tally.size == 0)
}
pub fn add_to_total(&self, total: &mut TotalAllocTallyMap) {
for (i, value) in self.values.iter().enumerate() {
total.values[i].count += value.count as u128;
total.values[i].size += value.size as u128;
}
}
}
impl<T> AllocOpMap<T> {
#[cfg(test)]
pub fn from_fn<F>(f: F) -> Self
where
F: FnMut(AllocOp) -> T,
{
Self { values: AllocOp::ALL.map(f) }
}
#[inline]
pub const fn get(&self, op: AllocOp) -> &T {
&self.values[op as usize]
}
#[inline]
pub fn get_mut(&mut self, op: AllocOp) -> &mut T {
&mut self.values[op as usize]
}
}
#[cfg(feature = "internal_benches")]
mod benches {
use super::*;
const THREADS: &[usize] = &[0, 1, 2, 4, 16];
#[crate::bench(crate = crate, threads = THREADS)]
fn tally_alloc(bencher: crate::Bencher) {
let count = crate::black_box(0);
let size = crate::black_box(0);
bencher.bench(|| {
if let Some(mut info) = ThreadAllocInfo::try_current() {
let info = unsafe { info.as_mut() };
info.tally_n(AllocOp::Alloc, count, size)
}
})
}
#[crate::bench_group(crate = crate, threads = THREADS)]
mod current {
use super::*;
#[crate::bench(crate = crate)]
fn init() -> Option<NonNull<ThreadAllocInfo>> {
ThreadAllocInfo::current()
}
#[crate::bench(crate = crate)]
fn r#try() -> Option<NonNull<ThreadAllocInfo>> {
ThreadAllocInfo::try_current()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tally() {
let mut alloc_info = ThreadAllocInfo::current().unwrap();
let mut take_alloc_tallies = || std::mem::take(unsafe { &mut alloc_info.as_mut().tallies });
_ = take_alloc_tallies();
let item_tally = ThreadAllocTally { count: 1, size: std::mem::size_of::<i32>() as _ };
let make_tally_map = |op: AllocOp| {
ThreadAllocTallyMap::from_fn(|other_op| {
if other_op == op {
item_tally
} else {
Default::default()
}
})
};
let mut buf: Vec<i32> = Vec::new();
assert_eq!(take_alloc_tallies(), Default::default());
buf.reserve_exact(1);
assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Alloc));
buf.reserve_exact(2);
assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Grow));
buf.shrink_to(1);
assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Shrink));
drop(buf);
assert_eq!(take_alloc_tallies(), make_tally_map(AllocOp::Dealloc));
let mut buf: Vec<i32> = Vec::new();
buf.reserve_exact(1); buf.reserve_exact(2); buf.shrink_to(1); drop(buf); assert_eq!(take_alloc_tallies(), ThreadAllocTallyMap { values: [item_tally; 4] });
}
}