use std::collections::HashMap;
use std::io;
use std::usize;
use {AhoCorasickBuilder, Match, MatchKind};
#[derive(Clone, Debug, Eq, PartialEq)]
struct SearchTest {
name: &'static str,
patterns: &'static [&'static str],
haystack: &'static str,
matches: &'static [(usize, usize, usize)],
}
macro_rules! t {
($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
SearchTest {
name: stringify!($name),
patterns: $patterns,
haystack: $haystack,
matches: $matches,
}
}
}
type TestCollection = &'static [&'static [SearchTest]];
const AC_STANDARD_NON_OVERLAPPING: TestCollection = &[
BASICS, NON_OVERLAPPING, STANDARD, REGRESSION,
];
const AC_STANDARD_OVERLAPPING: TestCollection = &[
BASICS, OVERLAPPING, REGRESSION,
];
const AC_LEFTMOST_FIRST: TestCollection = &[
BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION,
];
const AC_LEFTMOST_LONGEST: TestCollection = &[
BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION,
];
const BASICS: &'static [SearchTest] = &[
t!(basic000, &["a"], "", &[]),
t!(basic010, &["a"], "a", &[(0, 0, 1)]),
t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
t!(basic060, &["a"], "bbb", &[]),
t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
t!(basic100, &["aa"], "", &[]),
t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
t!(basic130, &["aa"], "abbab", &[]),
t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
t!(basic300, &["a", "b"], "", &[]),
t!(basic310, &["a", "b"], "z", &[]),
t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
t!(basic340, &["a", "b"], "abba", &[
(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),
]),
t!(basic350, &["b", "a"], "abba", &[
(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),
]),
t!(nover360, &["abc", "bc"], "xbc", &[
(1, 1, 3),
]),
t!(basic400, &["foo", "bar"], "", &[]),
t!(basic410, &["foo", "bar"], "foobar", &[
(0, 0, 3), (1, 3, 6),
]),
t!(basic420, &["foo", "bar"], "barfoo", &[
(1, 0, 3), (0, 3, 6),
]),
t!(basic430, &["foo", "bar"], "foofoo", &[
(0, 0, 3), (0, 3, 6),
]),
t!(basic440, &["foo", "bar"], "barbar", &[
(1, 0, 3), (1, 3, 6),
]),
t!(basic450, &["foo", "bar"], "bafofoo", &[
(0, 4, 7),
]),
t!(basic460, &["bar", "foo"], "bafofoo", &[
(1, 4, 7),
]),
t!(basic470, &["foo", "bar"], "fobabar", &[
(1, 4, 7),
]),
t!(basic480, &["bar", "foo"], "fobabar", &[
(0, 4, 7),
]),
t!(basic600, &[""], "", &[(0, 0, 0)]),
t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[
(0, 0, 7),
]),
t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[
(1, 1, 10),
]),
t!(basic720, &["yabcdef", "bcdeyabc", "abcdezghi"], "yabcdezghi", &[
(2, 1, 10),
]),
];
const STANDARD: &'static [SearchTest] = &[
t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
t!(standard400, &["abcd", "bcd", "cd", "b"], "abcd", &[
(3, 1, 2), (2, 2, 4),
]),
t!(standard410, &["", "a"], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(standard420, &["", "a"], "aa", &[
(0, 0, 0), (0, 1, 1), (0, 2, 2),
]),
t!(standard430, &["", "a", ""], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(standard440, &["a", "", ""], "a", &[
(1, 0, 0), (1, 1, 1),
]),
t!(standard450, &["", "", "a"], "a", &[
(0, 0, 0), (0, 1, 1),
]),
];
const LEFTMOST: &'static [SearchTest] = &[
t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
t!(leftmost010, &["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]),
t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
t!(leftmost360, &["abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(2, 0, 8),
]),
t!(leftmost370, &["abcdefghi", "cde", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftmost380, &["abcdefghi", "hz", "abcdefgh", "a"], "abcdefghz", &[
(2, 0, 8),
]),
t!(leftmost390, &["b", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftmost400, &["h", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftmost410, &["z", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8), (0, 8, 9),
]),
];
const LEFTMOST_FIRST: &'static [SearchTest] = &[
t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
t!(leftfirst011, &["", "a", ""], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(leftfirst012, &["a", "", ""], "a", &[
(0, 0, 1), (1, 1, 1),
]),
t!(leftfirst013, &["", "", "a"], "a", &[
(0, 0, 0), (0, 1, 1),
]),
t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
t!(leftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[
(1, 1, 2), (3, 2, 4),
]),
t!(leftfirst320, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(0, 0, 1), (2, 7, 9),
]),
t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
];
const LEFTMOST_LONGEST: &'static [SearchTest] = &[
t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[
(0, 0, 4),
]),
t!(leftlong020, &["", "a"], "a", &[
(1, 0, 1), (0, 1, 1),
]),
t!(leftlong021, &["", "a", ""], "a", &[
(1, 0, 1), (0, 1, 1),
]),
t!(leftlong022, &["a", "", ""], "a", &[
(0, 0, 1), (1, 1, 1),
]),
t!(leftlong023, &["", "", "a"], "a", &[
(2, 0, 1), (0, 1, 1),
]),
t!(leftlong030, &["", "a"], "aa", &[
(1, 0, 1), (1, 1, 2), (0, 2, 2),
]),
t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
t!(leftlong310, &["a", "abcdefghi", "hz", "abcdefgh"], "abcdefghz", &[
(3, 0, 8),
]),
t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
t!(leftlong330, &["abcd", "b", "ce"], "abce", &[
(1, 1, 2), (2, 2, 4),
]),
];
const NON_OVERLAPPING: &'static [SearchTest] = &[
t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[
(0, 0, 4),
]),
t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[
(2, 0, 4),
]),
t!(nover030, &["abc", "bc"], "zazabcz", &[
(0, 3, 6),
]),
t!(nover100, &["ab", "ba"], "abababa", &[
(0, 0, 2), (0, 2, 4), (0, 4, 6),
]),
t!(nover200, &["foo", "foo"], "foobarfoo", &[
(0, 0, 3), (0, 6, 9),
]),
t!(nover300, &["", ""], "", &[
(0, 0, 0),
]),
t!(nover310, &["", ""], "a", &[
(0, 0, 0), (0, 1, 1),
]),
];
const OVERLAPPING: &'static [SearchTest] = &[
t!(over000, &["abcd", "bcd", "cd", "b"], "abcd", &[
(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),
]),
t!(over010, &["bcd", "cd", "b", "abcd"], "abcd", &[
(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),
]),
t!(over020, &["abcd", "bcd", "cd"], "abcd", &[
(0, 0, 4), (1, 1, 4), (2, 2, 4),
]),
t!(over030, &["bcd", "abcd", "cd"], "abcd", &[
(1, 0, 4), (0, 1, 4), (2, 2, 4),
]),
t!(over040, &["bcd", "cd", "abcd"], "abcd", &[
(2, 0, 4), (0, 1, 4), (1, 2, 4),
]),
t!(over050, &["abc", "bc"], "zazabcz", &[
(0, 3, 6), (1, 4, 6),
]),
t!(over100, &["ab", "ba"], "abababa", &[
(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),
]),
t!(over200, &["foo", "foo"], "foobarfoo", &[
(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),
]),
t!(over300, &["", ""], "", &[
(0, 0, 0), (1, 0, 0),
]),
t!(over310, &["", ""], "a", &[
(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),
]),
t!(over320, &["", "a"], "a", &[
(0, 0, 0), (1, 0, 1), (0, 1, 1),
]),
t!(over330, &["", "a", ""], "a", &[
(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),
]),
t!(over340, &["a", "", ""], "a", &[
(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),
]),
t!(over350, &["", "", "a"], "a", &[
(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),
]),
];
const REGRESSION: &'static [SearchTest] = &[
t!(regression010, &["inf", "ind"], "infind", &[
(0, 0, 3), (1, 3, 6),
]),
t!(regression020, &["ind", "inf"], "infind", &[
(1, 0, 3), (0, 3, 6),
]),
t!(regression030, &["libcore/", "libstd/"], "libcore/char/methods.rs", &[
(0, 0, 8),
]),
t!(regression040, &["libstd/", "libcore/"], "libcore/char/methods.rs", &[
(1, 0, 8),
]),
t!(regression050, &["\x00\x00\x01", "\x00\x00\x00"], "\x00\x00\x00", &[
(1, 0, 3),
]),
t!(regression060, &["\x00\x00\x00", "\x00\x00\x01"], "\x00\x00\x00", &[
(0, 0, 3),
]),
];
macro_rules! testconfig {
(overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
#[test]
fn $name() {
run_search_tests($collection, |test| {
let mut builder = AhoCorasickBuilder::new();
$with(&mut builder);
builder
.match_kind(MatchKind::$kind)
.build(test.patterns)
.find_overlapping_iter(test.haystack)
.collect()
});
}
};
(stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
#[test]
fn $name() {
run_search_tests($collection, |test| {
let buf = io::BufReader::with_capacity(
1,
test.haystack.as_bytes(),
);
let mut builder = AhoCorasickBuilder::new();
$with(&mut builder);
builder
.match_kind(MatchKind::$kind)
.build(test.patterns)
.stream_find_iter(buf)
.map(|result| result.unwrap())
.collect()
});
}
};
($name:ident, $collection:expr, $kind:ident, $with:expr) => {
#[test]
fn $name() {
run_search_tests($collection, |test| {
let mut builder = AhoCorasickBuilder::new();
$with(&mut builder);
builder
.match_kind(MatchKind::$kind)
.build(test.patterns)
.find_iter(test.haystack)
.collect()
});
}
};
}
macro_rules! testcombo {
($name:ident, $collection:expr, $kind:ident) => {
mod $name {
use super::*;
testconfig!(nfa_default, $collection, $kind, |_| ());
testconfig!(nfa_no_prefilter, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.prefilter(false);
});
testconfig!(nfa_all_sparse, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dense_depth(0);
});
testconfig!(nfa_all_dense, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dense_depth(usize::MAX);
});
testconfig!(dfa_default, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true);
});
testconfig!(dfa_no_prefilter, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).prefilter(false);
});
testconfig!(dfa_all_sparse, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).dense_depth(0);
});
testconfig!(dfa_all_dense, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).dense_depth(usize::MAX);
});
testconfig!(dfa_no_byte_class, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).byte_classes(false);
});
testconfig!(dfa_no_premultiply, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).premultiply(false);
});
testconfig!(dfa_no_byte_class_no_premultiply, $collection, $kind,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).byte_classes(false).premultiply(false);
});
}
};
}
testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
testcombo!(
search_standard_nonoverlapping, AC_STANDARD_NON_OVERLAPPING, Standard
);
testconfig!(
overlapping,
search_standard_overlapping_nfa_default,
AC_STANDARD_OVERLAPPING,
Standard,
|_| ()
);
testconfig!(
overlapping,
search_standard_overlapping_nfa_all_sparse,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dense_depth(0); }
);
testconfig!(
overlapping,
search_standard_overlapping_nfa_all_dense,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dense_depth(usize::MAX); }
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_default,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true); }
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_all_sparse,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(0); }
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_all_dense,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).dense_depth(usize::MAX); }
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_no_byte_class,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).byte_classes(false); }
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_no_premultiply,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true).premultiply(false); }
);
testconfig!(
overlapping,
search_standard_overlapping_dfa_no_byte_class_no_premultiply,
AC_STANDARD_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| {
b.dfa(true).byte_classes(false).premultiply(false);
}
);
testconfig!(
stream,
search_standard_stream_nfa_default,
AC_STANDARD_NON_OVERLAPPING,
Standard,
|_| ()
);
testconfig!(
stream,
search_standard_stream_dfa_default,
AC_STANDARD_NON_OVERLAPPING,
Standard,
|b: &mut AhoCorasickBuilder| { b.dfa(true); }
);
#[test]
fn search_tests_have_unique_names() {
let assert = |constname, tests: &[SearchTest]| {
let mut seen = HashMap::new(); for (i, test) in tests.iter().enumerate() {
if !seen.contains_key(test.name) {
seen.insert(test.name, i);
} else {
let last = seen[test.name];
panic!(
"{} tests have duplicate names at positions {} and {}",
constname, last, i
);
}
}
};
assert("BASICS", BASICS);
assert("STANDARD", STANDARD);
assert("LEFTMOST", LEFTMOST);
assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
assert("NON_OVERLAPPING", NON_OVERLAPPING);
assert("OVERLAPPING", OVERLAPPING);
assert("REGRESSION", REGRESSION);
}
#[test]
#[should_panic]
fn stream_not_allowed_leftmost_first() {
let fsm = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(None::<String>);
assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
}
#[test]
#[should_panic]
fn stream_not_allowed_leftmost_longest() {
let fsm = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostLongest)
.build(None::<String>);
assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
}
#[test]
#[should_panic]
fn overlapping_not_allowed_leftmost_first() {
let fsm = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.build(None::<String>);
assert_eq!(fsm.find_overlapping_iter("").count(), 0);
}
#[test]
#[should_panic]
fn overlapping_not_allowed_leftmost_longest() {
let fsm = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostLongest)
.build(None::<String>);
assert_eq!(fsm.find_overlapping_iter("").count(), 0);
}
#[test]
fn state_id_too_small() {
let mut patterns = vec![];
for c1 in (b'a'..b'z').map(|b| b as char) {
for c2 in (b'a'..b'z').map(|b| b as char) {
for c3 in (b'a'..b'z').map(|b| b as char) {
patterns.push(format!("{}{}{}", c1, c2, c3));
}
}
}
let result = AhoCorasickBuilder::new()
.build_with_size::<u8, _, _>(&patterns);
assert!(result.is_err());
}
fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
which: TestCollection,
mut f: F,
) {
let get_match_triples =
|matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
matches.into_iter()
.map(|m| (m.pattern(), m.start(), m.end()))
.collect()
};
for &tests in which {
for test in tests {
assert_eq!(
test.matches,
get_match_triples(f(&test)).as_slice(),
"test: {}, patterns: {:?}, haystack: {:?}",
test.name,
test.patterns,
test.haystack
);
}
}
}