extern crate arrow;
#[macro_use]
extern crate criterion;
use criterion::Criterion;
use arrow::array::*;
use arrow_buffer::i256;
use rand::Rng;
use std::iter::repeat_n;
use std::{hint, sync::Arc};
fn array_from_vec(n: usize) {
let v: Vec<i32> = (0..n as i32).collect();
hint::black_box(Int32Array::from(v));
}
fn array_string_from_vec(n: usize) {
let mut v: Vec<Option<&str>> = Vec::with_capacity(n);
for i in 0..n {
if i % 2 == 0 {
v.push(Some("hello world"));
} else {
v.push(None);
}
}
hint::black_box(StringArray::from(v));
}
fn struct_array_values(
n: usize,
) -> (
&'static str,
Vec<Option<&'static str>>,
&'static str,
Vec<Option<i32>>,
) {
let mut strings: Vec<Option<&str>> = Vec::with_capacity(n);
let mut ints: Vec<Option<i32>> = Vec::with_capacity(n);
for _ in 0..n / 4 {
strings.extend_from_slice(&[Some("joe"), None, None, Some("mark")]);
ints.extend_from_slice(&[Some(1), Some(2), None, Some(4)]);
}
("f1", strings, "f2", ints)
}
fn struct_array_from_vec(
field1: &str,
strings: &[Option<&str>],
field2: &str,
ints: &[Option<i32>],
) {
let strings: ArrayRef = Arc::new(StringArray::from(strings.to_owned()));
let ints: ArrayRef = Arc::new(Int32Array::from(ints.to_owned()));
hint::black_box(StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap());
}
fn decimal32_array_from_vec(array: &[Option<i32>]) {
hint::black_box(
array
.iter()
.copied()
.collect::<Decimal32Array>()
.with_precision_and_scale(9, 2)
.unwrap(),
);
}
fn decimal64_array_from_vec(array: &[Option<i64>]) {
hint::black_box(
array
.iter()
.copied()
.collect::<Decimal64Array>()
.with_precision_and_scale(17, 2)
.unwrap(),
);
}
fn decimal128_array_from_vec(array: &[Option<i128>]) {
hint::black_box(
array
.iter()
.copied()
.collect::<Decimal128Array>()
.with_precision_and_scale(34, 2)
.unwrap(),
);
}
fn decimal256_array_from_vec(array: &[Option<i256>]) {
hint::black_box(
array
.iter()
.copied()
.collect::<Decimal256Array>()
.with_precision_and_scale(70, 2)
.unwrap(),
);
}
fn array_from_vec_decimal_benchmark(c: &mut Criterion) {
let size: usize = 1 << 15;
let mut rng = rand::rng();
let mut array = vec![];
for _ in 0..size {
array.push(Some(rng.random_range::<i32, _>(0..99999999)));
}
c.bench_function("decimal32_array_from_vec 32768", |b| {
b.iter(|| decimal32_array_from_vec(array.as_slice()))
});
let size: usize = 1 << 15;
let mut rng = rand::rng();
let mut array = vec![];
for _ in 0..size {
array.push(Some(rng.random_range::<i64, _>(0..9999999999)));
}
c.bench_function("decimal64_array_from_vec 32768", |b| {
b.iter(|| decimal64_array_from_vec(array.as_slice()))
});
let size: usize = 1 << 15;
let mut rng = rand::rng();
let mut array = vec![];
for _ in 0..size {
array.push(Some(rng.random_range::<i128, _>(0..9999999999)));
}
c.bench_function("decimal128_array_from_vec 32768", |b| {
b.iter(|| decimal128_array_from_vec(array.as_slice()))
});
let size = 1 << 10;
let mut array = vec![];
let mut rng = rand::rng();
for _ in 0..size {
let decimal = i256::from_i128(rng.random_range::<i128, _>(0..9999999999999));
array.push(Some(decimal));
}
c.bench_function("decimal256_array_from_vec 32768", |b| {
b.iter(|| decimal256_array_from_vec(array.as_slice()))
});
}
fn array_from_vec_benchmark(c: &mut Criterion) {
c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
c.bench_function("array_string_from_vec 128", |b| {
b.iter(|| array_string_from_vec(128))
});
c.bench_function("array_string_from_vec 256", |b| {
b.iter(|| array_string_from_vec(256))
});
c.bench_function("array_string_from_vec 512", |b| {
b.iter(|| array_string_from_vec(512))
});
let (field1, strings, field2, ints) = struct_array_values(128);
c.bench_function("struct_array_from_vec 128", |b| {
b.iter(|| struct_array_from_vec(field1, &strings, field2, &ints))
});
let (field1, strings, field2, ints) = struct_array_values(256);
c.bench_function("struct_array_from_vec 256", |b| {
b.iter(|| struct_array_from_vec(field1, &strings, field2, &ints))
});
let (field1, strings, field2, ints) = struct_array_values(512);
c.bench_function("struct_array_from_vec 512", |b| {
b.iter(|| struct_array_from_vec(field1, &strings, field2, &ints))
});
let (field1, strings, field2, ints) = struct_array_values(1024);
c.bench_function("struct_array_from_vec 1024", |b| {
b.iter(|| struct_array_from_vec(field1, &strings, field2, &ints))
});
}
fn gen_option_vector<TItem: Copy>(item: TItem, len: usize) -> Vec<Option<TItem>> {
hint::black_box(
repeat_n(item, len)
.enumerate()
.map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) })
.collect(),
)
}
fn from_iter_benchmark(c: &mut Criterion) {
const ITER_LEN: usize = 16_384;
c.bench_function("Int64Array::from_iter", |b| {
let values = gen_option_vector(1, ITER_LEN);
b.iter(|| hint::black_box(Int64Array::from_iter(values.iter())));
});
c.bench_function("Int64Array::from_trusted_len_iter", |b| {
let values = gen_option_vector(1, ITER_LEN);
b.iter(|| unsafe {
hint::black_box(Int64Array::from_trusted_len_iter(values.iter()))
});
});
c.bench_function("BooleanArray::from_iter", |b| {
let values = gen_option_vector(true, ITER_LEN);
b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
});
c.bench_function("BooleanArray::from_trusted_len_iter", |b| {
let values = gen_option_vector(true, ITER_LEN);
b.iter(|| unsafe {
hint::black_box(BooleanArray::from_trusted_len_iter(values.iter()))
});
});
}
criterion_group!(
benches,
array_from_vec_benchmark,
array_from_vec_decimal_benchmark,
from_iter_benchmark
);
criterion_main!(benches);