#![feature(test)]
extern crate aho_corasick;
extern crate test;
use std::iter;
use aho_corasick::{Automaton, AcAutomaton, Transitions};
use test::Bencher;
const HAYSTACK_RANDOM: &'static str = include_str!("random.txt");
fn bench_aut_no_match<P: AsRef<[u8]>, T: Transitions>(
b: &mut Bencher,
aut: AcAutomaton<P, T>,
haystack: &str,
) {
b.bytes = haystack.len() as u64;
b.iter(|| assert!(aut.find(haystack).next().is_none()));
}
fn bench_full_aut_no_match<P: AsRef<[u8]>, T: Transitions>(
b: &mut Bencher,
aut: AcAutomaton<P, T>,
haystack: &str,
) {
let aut = aut.into_full();
b.bytes = haystack.len() as u64;
b.iter(|| assert!(aut.find(haystack).next().is_none()));
}
fn bench_naive_no_match<S>(b: &mut Bencher, needles: Vec<S>, haystack: &str)
where S: Into<String> {
b.bytes = haystack.len() as u64;
let needles: Vec<String> = needles.into_iter().map(Into::into).collect();
b.iter(|| assert!(!naive_find(&needles, haystack)));
}
fn haystack_same(letter: char) -> String {
iter::repeat(letter).take(10000).collect()
}
macro_rules! aut_benches {
($prefix:ident, $aut:expr, $bench:expr) => { mod $prefix {
#![allow(unused_imports)]
use aho_corasick::{Automaton, AcAutomaton, Sparse};
use test::Bencher;
use super::{
HAYSTACK_RANDOM, haystack_same,
bench_aut_no_match, bench_full_aut_no_match,
};
#[bench]
fn ac_one_byte(b: &mut Bencher) {
let aut = $aut(vec!["a"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_one_prefix_byte_no_match(b: &mut Bencher) {
let aut = $aut(vec!["zbc"]);
$bench(b, aut, &haystack_same('y'));
}
#[bench]
fn ac_one_prefix_byte_every_match(b: &mut Bencher) {
let aut = $aut(vec!["zbc"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_one_prefix_byte_random(b: &mut Bencher) {
let aut = $aut(vec!["zbc\x00"]);
$bench(b, aut, HAYSTACK_RANDOM);
}
#[bench]
fn ac_two_bytes(b: &mut Bencher) {
let aut = $aut(vec!["a", "b"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_two_diff_prefix(b: &mut Bencher) {
let aut = $aut(vec!["abcdef", "bmnopq"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_two_one_prefix_byte_every_match(b: &mut Bencher) {
let aut = $aut(vec!["zbcdef", "zmnopq"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_two_one_prefix_byte_no_match(b: &mut Bencher) {
let aut = $aut(vec!["zbcdef", "zmnopq"]);
$bench(b, aut, &haystack_same('y'));
}
#[bench]
fn ac_two_one_prefix_byte_random(b: &mut Bencher) {
let aut = $aut(vec!["zbcdef\x00", "zmnopq\x00"]);
$bench(b, aut, HAYSTACK_RANDOM);
}
#[bench]
fn ac_ten_bytes(b: &mut Bencher) {
let aut = $aut(vec!["a", "b", "c", "d", "e",
"f", "g", "h", "i", "j"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_ten_diff_prefix(b: &mut Bencher) {
let aut = $aut(vec!["abcdef", "bbcdef", "cbcdef", "dbcdef",
"ebcdef", "fbcdef", "gbcdef", "hbcdef",
"ibcdef", "jbcdef"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_ten_one_prefix_byte_every_match(b: &mut Bencher) {
let aut = $aut(vec!["zacdef", "zbcdef", "zccdef", "zdcdef",
"zecdef", "zfcdef", "zgcdef", "zhcdef",
"zicdef", "zjcdef"]);
$bench(b, aut, &haystack_same('z'));
}
#[bench]
fn ac_ten_one_prefix_byte_no_match(b: &mut Bencher) {
let aut = $aut(vec!["zacdef", "zbcdef", "zccdef", "zdcdef",
"zecdef", "zfcdef", "zgcdef", "zhcdef",
"zicdef", "zjcdef"]);
$bench(b, aut, &haystack_same('y'));
}
#[bench]
fn ac_ten_one_prefix_byte_random(b: &mut Bencher) {
let aut = $aut(vec!["zacdef\x00", "zbcdef\x00", "zccdef\x00",
"zdcdef\x00", "zecdef\x00", "zfcdef\x00",
"zgcdef\x00", "zhcdef\x00", "zicdef\x00",
"zjcdef\x00"]);
$bench(b, aut, HAYSTACK_RANDOM);
}
}}}
aut_benches!(dense, AcAutomaton::new, bench_aut_no_match);
aut_benches!(sparse, AcAutomaton::<&str, Sparse>::with_transitions,
bench_aut_no_match);
aut_benches!(full, AcAutomaton::new, bench_full_aut_no_match);
fn naive_find(needles: &[String], haystack: &str) -> bool {
for hi in 0..haystack.len() {
let rest = &haystack.as_bytes()[hi..];
for needle in needles {
let needle = needle.as_bytes();
if needle.len() > rest.len() {
continue;
}
if needle == &rest[..needle.len()] {
return true;
}
}
}
false
}
#[bench]
fn naive_one_byte(b: &mut Bencher) {
bench_naive_no_match(b, vec!["a"], &haystack_same('z'));
}
#[bench]
fn naive_one_prefix_byte_no_match(b: &mut Bencher) {
bench_naive_no_match(b, vec!["zbc"], &haystack_same('y'));
}
#[bench]
fn naive_one_prefix_byte_every_match(b: &mut Bencher) {
bench_naive_no_match(b, vec!["zbc"], &haystack_same('z'));
}
#[bench]
fn naive_one_prefix_byte_random(b: &mut Bencher) {
bench_naive_no_match(b, vec!["zbc\x00"], HAYSTACK_RANDOM);
}
#[bench]
fn naive_two_bytes(b: &mut Bencher) {
bench_naive_no_match(b, vec!["a", "b"], &haystack_same('z'));
}
#[bench]
fn naive_two_diff_prefix(b: &mut Bencher) {
bench_naive_no_match(b, vec!["abcdef", "bmnopq"], &haystack_same('z'));
}
#[bench]
fn naive_two_one_prefix_byte_every_match(b: &mut Bencher) {
bench_naive_no_match(b, vec!["zbcdef", "zmnopq"], &haystack_same('z'));
}
#[bench]
fn naive_two_one_prefix_byte_no_match(b: &mut Bencher) {
bench_naive_no_match(b, vec!["zbcdef", "zmnopq"], &haystack_same('y'));
}
#[bench]
fn naive_two_one_prefix_byte_random(b: &mut Bencher) {
bench_naive_no_match(b, vec!["zbcdef\x00", "zmnopq\x00"], HAYSTACK_RANDOM);
}
#[bench]
fn naive_ten_bytes(b: &mut Bencher) {
let needles = vec!["a", "b", "c", "d", "e",
"f", "g", "h", "i", "j"];
bench_naive_no_match(b, needles, &haystack_same('z'));
}
#[bench]
fn naive_ten_diff_prefix(b: &mut Bencher) {
let needles = vec!["abcdef", "bbcdef", "cbcdef", "dbcdef",
"ebcdef", "fbcdef", "gbcdef", "hbcdef",
"ibcdef", "jbcdef"];
bench_naive_no_match(b, needles, &haystack_same('z'));
}
#[bench]
fn naive_ten_one_prefix_byte_every_match(b: &mut Bencher) {
let needles = vec!["zacdef", "zbcdef", "zccdef", "zdcdef",
"zecdef", "zfcdef", "zgcdef", "zhcdef",
"zicdef", "zjcdef"];
bench_naive_no_match(b, needles, &haystack_same('z'));
}
#[bench]
fn naive_ten_one_prefix_byte_no_match(b: &mut Bencher) {
let needles = vec!["zacdef", "zbcdef", "zccdef", "zdcdef",
"zecdef", "zfcdef", "zgcdef", "zhcdef",
"zicdef", "zjcdef"];
bench_naive_no_match(b, needles, &haystack_same('y'));
}
#[bench]
fn naive_ten_one_prefix_byte_random(b: &mut Bencher) {
let needles = vec!["zacdef\x00", "zbcdef\x00", "zccdef\x00",
"zdcdef\x00", "zecdef\x00", "zfcdef\x00",
"zgcdef\x00", "zhcdef\x00", "zicdef\x00",
"zjcdef\x00"];
bench_naive_no_match(b, needles, HAYSTACK_RANDOM);
}