use std::{hash::{Hash, Hasher, BuildHasher}, collections::HashSet, slice};
use rand::Rng;
use criterion::black_box;
fn main() {
bench_hasher_quality::<gxhash::GxBuildHasher>("GxHash");
bench_hasher_quality::<ahash::RandomState>("AHash");
bench_hasher_quality::<t1ha::T1haBuildHasher>("T1ha");
bench_hasher_quality::<twox_hash::xxh3::RandomHashBuilder64>("XxHash3");
bench_hasher_quality::<std::collections::hash_map::RandomState>("Default");
bench_hasher_quality::<fnv::FnvBuildHasher>("FNV-1a");
}
macro_rules! check {
($func:expr) => {
let score = $func;
let name = stringify!($func).replace('\n', "").replace(" ", "");
if score == 0.0 {
println!(" ✅ {}", name);
} else {
println!(" ❌ {}", name);
println!(" | Score: {}. Expected is 0.", score);
};
};
}
fn bench_hasher_quality<B>(name: &str)
where B : BuildHasher + Default
{
println!("Bench {}", name);
check!(avalanche::<B, 4>());
check!(avalanche::<B, 10>());
check!(avalanche::<B, 32>());
check!(avalanche::<B, 128>());
check!(avalanche::<B, 512>());
check!(distribution_values::<B, 4>(128 * 128));
check!(distribution_values::<B, 16>(128 * 128));
check!(distribution_values::<B, 128>(128 * 128));
check!(distribution_values::<B, 512>(128 * 128));
check!(distribution_bits::<B, 4>());
check!(distribution_bits::<B, 16>());
check!(distribution_bits::<B, 128>());
check!(distribution_bits::<B, 512>());
check!(collisions_padded_zeroes::<B>(128 * 128));
check!(collisions_flipped_bits::<B, 2>(9));
check!(collisions_flipped_bits::<B, 3>(9));
check!(collisions_flipped_bits::<B, 4>(7));
check!(collisions_flipped_bits::<B, 5>(6));
check!(collisions_flipped_bits::<B, 6>(5));
check!(collisions_flipped_bits::<B, 7>(5));
check!(collisions_flipped_bits::<B, 9>(4));
check!(collisions_flipped_bits::<B, 20>(4));
check!(collisions_flipped_bits::<B, 32>(3));
check!(collisions_flipped_bits::<B, 64>(3));
check!(collisions_flipped_bits::<B, 256>(2));
check!(collisions_permute::<B, u8>(4, &Vec::from_iter(0..16))); check!(collisions_permute::<B, u8>(42, &Vec::from_iter(0..64)));
check!(collisions_permute::<B, u16>(42, &Vec::from_iter(0..64)));
check!(collisions_permute::<B, u32>(42, &Vec::from_iter(0..64)));
check!(collisions_permute::<B, u64>(42, &Vec::from_iter(0..64)));
check!(collisions_permute::<B, u128>(4, &Vec::from_iter(0..16))); check!(collisions_permute::<B, u128>(42, &Vec::from_iter(0..64)));
check!(collisions_powerset_bytes::<B>(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]));
check!(collisions_powerset_bytes::<B>(&[0, 1, 2, 4, 8, 16, 32, 64, 128]));
check!(hasher_collisions_permute::<B, u8>(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]));
check!(hasher_collisions_permute::<B, u32>(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]));
check!(hasher_collisions_permute::<B, u32>(&[0, 1, 2, 4, 8, 16, 32, 64, 128, 256]));
check!(hasher_collisions_powerset::<B, u32>(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]));
check!(hasher_collisions_powerset::<B, u32>(&[0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]));
}
fn hasher_collisions_permute<B, D>(data: &[impl Hash]) -> f64
where B : BuildHasher + Default
{
use itertools::Itertools;
let build_hasher = B::default();
let mut set = HashSet::new();
let mut i = 0;
for perm in data.iter().permutations(data.len()) {
let mut hasher = build_hasher.build_hasher();
perm.hash(&mut hasher);
set.insert(hasher.finish());
i += 1;
}
(i - set.len()) as f64 / i as f64
}
fn collisions_permute<B, D>(step: usize, data: &[D]) -> f64
where B : BuildHasher + Default,
D : Clone
{
let build_hasher = B::default();
let mut set = HashSet::new();
let mut i = 0;
let mut x = data.to_vec();
permute(&mut x, 0, step, &mut |d| {
let len = data.len() * std::mem::size_of::<D>();
let perm_u8 = unsafe {
slice::from_raw_parts(d.as_ptr() as *const u8, len)
};
let mut hasher = build_hasher.build_hasher();
hasher.write(&perm_u8);
set.insert(hasher.finish());
i += 1;
});
(i - set.len()) as f64 / i as f64
}
fn permute<T, F>(arr: &mut [T], start: usize, step: usize, f: &mut F)
where F: FnMut(&[T])
{
if start >= arr.len() - 1 {
f(arr);
} else {
for i in (start..arr.len()).step_by(step) {
arr.swap(start, i);
permute(arr, start + 1, step, f);
arr.swap(start, i);
}
}
}
fn hasher_collisions_powerset<B, D>(data: &[impl Hash]) -> f64
where B : BuildHasher + Default
{
use itertools::Itertools;
let build_hasher = B::default();
let mut set = HashSet::new();
let mut i = 0;
for perm in data.iter().powerset() {
let mut hasher = build_hasher.build_hasher();
perm.hash(&mut hasher);
set.insert(hasher.finish());
i += 1;
}
(i - set.len()) as f64 / i as f64
}
fn collisions_powerset_bytes<B>(data: &[u8]) -> f64
where B : BuildHasher + Default
{
use itertools::Itertools;
let build_hasher = B::default();
let mut set = HashSet::new();
let mut i = 0;
for perm in data.iter().powerset() {
let mut hasher = build_hasher.build_hasher();
let features: Vec<u8> = perm.iter().map(|f| **f).collect();
hasher.write(&features);
set.insert(hasher.finish());
i += 1;
}
(i - set.len()) as f64 / i as f64
}
fn collisions_padded_zeroes<B>(max_size: usize) -> f64
where B : BuildHasher + Default
{
let build_hasher = B::default();
let bytes = vec![0u8; max_size];
let mut set = HashSet::new();
let mut i = 0;
for _ in 0..max_size {
let slice = &bytes[0..i];
let mut hasher = build_hasher.build_hasher();
hasher.write(slice);
set.insert(hasher.finish());
i += 1;
}
(i - set.len()) as f64 / i as f64
}
fn collisions_flipped_bits<B, const N: usize>(bits_to_set: usize) -> f64
where B : BuildHasher + Default
{
let build_hasher = B::default();
let mut input = [0u8; N];
let mut hashes = Vec::new();
let mut hasher = build_hasher.build_hasher();
hasher.write(&input);
hashes.push(hasher.finish());
flip_n_bits_recurse::<B, N>(&build_hasher, 0, bits_to_set, &mut input, &mut hashes);
let hashes_count = hashes.len();
let set: HashSet<u64> = HashSet::from_iter(hashes);
(hashes_count - set.len()) as f64 / hashes_count as f64
}
fn flip_n_bits_recurse<B, const N: usize>(
build_hasher: &B, start: usize, bits_left: usize, input: &mut [u8], hashes: &mut Vec<u64>)
where B : BuildHasher + Default
{
let nbits: usize = N * 8;
for i in start..nbits {
let bit = 1 << (i % 8);
input[i / 8] ^= bit;
let mut hasher = build_hasher.build_hasher();
hasher.write(&input);
hashes.push(hasher.finish());
if bits_left > 1 {
flip_n_bits_recurse::<B, N>(build_hasher, i + 1, bits_left - 1, input, hashes);
}
let bit = 1 << (i % 8);
input[i / 8] ^= bit;
}
}
fn avalanche<B, const N: usize>() -> f64
where B : BuildHasher + Default
{
const AVALANCHE_ITERATIONS: usize = 1000;
const AVG_ITERATIONS: usize = 10;
let mut sum: f64 = 0f64;
for _ in 0..AVG_ITERATIONS {
sum += avalanche_iterations::<B, N>(AVALANCHE_ITERATIONS);
}
let score = sum / AVG_ITERATIONS as f64;
let rounded = round_to_decimal(score, (AVALANCHE_ITERATIONS as f64).log10() as usize);
rounded
}
fn avalanche_iterations<B, const N: usize>(iterations: usize) -> f64
where B : BuildHasher + Default
{
let build_hasher = B::default();
const SIZE_R: usize = std::mem::size_of::<u64>();
let mut scores_sum = 0f64;
let mut rng = rand::thread_rng();
let input: &mut [u8] = &mut [0u8; N];
for _ in 0..iterations {
rng.fill(input);
let mut hasher1 = build_hasher.build_hasher();
hasher1.write(&input);
let v1 = hasher1.finish();
let bytes_bit_changed = &mut input.to_vec().clone();
for i in 0..(N * 8) {
bytes_bit_changed[i / 8] = input[i / 8] ^ (1 << (i % 8));
let mut hasher2 = build_hasher.build_hasher();
hasher2.write(black_box(&bytes_bit_changed)); let v2 = black_box(hasher2.finish());
let diffs = (v1 ^ v2).count_ones();
let diff_ratio = diffs as f64 / (SIZE_R * 8) as f64;
scores_sum += diff_ratio;
bytes_bit_changed[i / 8] = input[i / 8];
}
}
let count = iterations * N * 8;
let score = (1.0 - 2.0 * (scores_sum / count as f64)).abs();
score
}
fn distribution_bits<B, const N: usize>() -> f64
where B : BuildHasher + Default
{
const DISTRIBUTION_ITERATIONS: usize = 10000;
const AVG_ITERATIONS: usize = 100;
let mut sum: f64 = 0f64;
for _ in 0..AVG_ITERATIONS {
sum += distribution_bits_iterations::<B, N>(DISTRIBUTION_ITERATIONS);
}
let score = sum / AVG_ITERATIONS as f64;
let rounded = round_to_decimal(score, (DISTRIBUTION_ITERATIONS as f64).log10() as usize);
rounded
}
fn distribution_bits_iterations<B, const N: usize>(iterations: usize) -> f64
where B : BuildHasher + Default
{
let build_hasher = B::default();
const SIZE_R: usize = std::mem::size_of::<u64>();
let mut bit_buckets = vec![0f64; SIZE_R * 8];
let mut rng = rand::thread_rng();
let input: &mut [u8] = &mut [0u8; N];
for _ in 0..iterations {
rng.fill(input);
let mut hasher = build_hasher.build_hasher();
hasher.write(&input);
let hash = hasher.finish();
let hash_bytes = hash.to_ne_bytes();
for b in 0..SIZE_R {
for k in 0..8 {
bit_buckets[8 * b + k] += ((hash_bytes[b] >> k) & 1) as f64;
}
}
}
bit_buckets = bit_buckets.iter().map(|x| x / iterations as f64).collect();
let std = variance_to_mean(&bit_buckets, 0.5);
let worst_variance = 0.25f64;
let score = std / worst_variance;
score
}
fn variance(data: &[f64]) -> f64 {
if data.is_empty() {
return 0.0;
}
let mean = data.iter().sum::<f64>() / data.len() as f64;
let variance = data.iter().map(|value| {
let diff = mean - value;
diff * diff
}).sum::<f64>() / data.len() as f64;
variance
}
fn variance_to_mean(data: &[f64], mean: f64) -> f64 {
if data.is_empty() {
return 0.0;
}
let variance = data.iter().map(|value| {
let diff = mean - value;
diff * diff
}).sum::<f64>() / data.len() as f64;
variance
}
fn distribution_values<B, const N: usize>(buckets_count: usize) -> f64
where B : BuildHasher + Default
{
const DISTRIBUTION_ITERATIONS: usize = 100000;
const AVG_ITERATIONS: usize = 100;
let mut sum: f64 = 0f64;
for _ in 0..AVG_ITERATIONS {
sum += distribution_values_iterations::<B, N>(DISTRIBUTION_ITERATIONS, buckets_count);
}
let score = sum / AVG_ITERATIONS as f64;
let rounded = round_to_decimal(score, (DISTRIBUTION_ITERATIONS as f64).log10() as usize);
rounded
}
fn distribution_values_iterations<B, const N: usize>(iterations: usize, buckets_count: usize) -> f64
where B : BuildHasher + Default
{
let build_hasher = B::default();
const MAX_U64: u64 = u64::MAX;
let mut buckets = vec![0f64; buckets_count];
let mut rng = rand::thread_rng();
let input: &mut [u8] = &mut [0u8; N];
for _ in 0..iterations {
rng.fill(input);
let mut hasher = build_hasher.build_hasher();
hasher.write(&input);
let hash = hasher.finish();
let hash_f = hash as f64;
let bucketed_f = hash_f / MAX_U64 as f64;
let index = (buckets_count as f64 * bucketed_f).floor() as usize;
buckets[index] += 1f64;
}
buckets = buckets.iter().map(|x| x / iterations as f64).collect();
let std = variance(&buckets);
let worst_variance = 1f64 / buckets_count as f64;
let score = std / worst_variance;
score
}
fn round_to_decimal(value: f64, decimals: usize) -> f64
{
let factor = 10f64.powi(decimals as i32 - 1);
(value * factor).round() / factor
}