use std::cmp::{self, Ordering};
use syntax;
use Error;
use backtrack::{Backtrack, BackMachine};
use char::Char;
use compile::Compiler;
use nfa::{Nfa, NfaThreads};
use pool::Pool;
use prefix::Prefix;
use re::CaptureIdxs;
const NUM_PREFIX_LIMIT: usize = 30;
const PREFIX_LENGTH_LIMIT: usize = 15;
pub type InstIdx = usize;
#[derive(Clone, Debug)]
pub enum Inst {
Match,
Save(usize),
Jump(InstIdx),
Split(InstIdx, InstIdx),
EmptyLook(LookInst),
Char(OneChar),
Ranges(CharRanges),
}
#[derive(Clone, Debug)]
pub struct OneChar {
pub c: char,
pub casei: bool,
}
#[derive(Clone, Debug)]
pub struct CharRanges {
pub ranges: Vec<(char, char)>,
pub casei: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum LookInst {
StartLine,
EndLine,
StartText,
EndText,
WordBoundary,
NotWordBoundary,
}
impl OneChar {
#[inline(always)] pub fn matches(&self, c: Char) -> bool {
self.c == c || (self.casei && self.c == c.case_fold())
}
}
impl CharRanges {
pub fn any() -> CharRanges {
CharRanges {
ranges: vec![('\x00', '\u{10ffff}')],
casei: false,
}
}
pub fn any_nonl() -> CharRanges {
CharRanges {
ranges: vec![('\x00', '\x09'), ('\x0B', '\u{10ffff}')],
casei: false,
}
}
pub fn from_class(cls: syntax::CharClass) -> CharRanges {
let casei = cls.is_case_insensitive();
CharRanges {
ranges: cls.into_iter().map(|r| (r.start, r.end)).collect(),
casei: casei,
}
}
#[inline(always)] pub fn matches(&self, mut c: Char) -> Option<usize> {
if self.casei {
c = c.case_fold();
}
for i in 0..cmp::min(self.ranges.len(), 4) {
let r = self.ranges[i];
if c < r.0 {
return None;
}
if c <= r.1 {
return Some(i);
}
}
self.ranges.binary_search_by(|r| {
if r.1 < c {
Ordering::Less
} else if r.0 > c {
Ordering::Greater
} else {
Ordering::Equal
}
}).ok()
}
}
impl LookInst {
pub fn matches(&self, c1: Char, c2: Char) -> bool {
use self::LookInst::*;
match *self {
StartLine => c1.is_none() || c1 == '\n',
EndLine => c2.is_none() || c2 == '\n',
StartText => c1.is_none(),
EndText => c2.is_none(),
ref wbty => {
let (w1, w2) = (c1.is_word_char(), c2.is_word_char());
(*wbty == WordBoundary && w1 ^ w2)
|| (*wbty == NotWordBoundary && !(w1 ^ w2))
}
}
}
}
#[doc(hidden)]
#[derive(Clone, Copy, Debug)]
pub enum MatchEngine {
Backtrack,
Nfa,
Literals,
}
#[derive(Debug)]
pub struct Program {
pub original: String,
pub insts: Vec<Inst>,
pub cap_names: Vec<Option<String>>,
pub prefixes: Prefix,
pub prefixes_complete: bool,
pub anchored_begin: bool,
pub anchored_end: bool,
pub engine: Option<MatchEngine>,
pub nfa_threads: Pool<NfaThreads>,
pub backtrack: Pool<BackMachine>,
}
impl Program {
pub fn new(
engine: Option<MatchEngine>,
size_limit: usize,
re: &str,
) -> Result<Program, Error> {
let expr = try!(syntax::Expr::parse(re));
let (insts, cap_names) = try!(Compiler::new(size_limit).compile(expr));
let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
let create_threads = move || NfaThreads::new(insts_len, ncaps);
let create_backtrack = move || BackMachine::new();
let mut prog = Program {
original: re.into(),
insts: insts,
cap_names: cap_names,
prefixes: Prefix::Empty,
prefixes_complete: false,
anchored_begin: false,
anchored_end: false,
engine: engine,
nfa_threads: Pool::new(Box::new(create_threads)),
backtrack: Pool::new(Box::new(create_backtrack)),
};
prog.find_prefixes();
prog.anchored_begin = match prog.insts[1] {
Inst::EmptyLook(LookInst::StartText) => true,
_ => false,
};
prog.anchored_end = match prog.insts[prog.insts.len() - 3] {
Inst::EmptyLook(LookInst::EndText) => true,
_ => false,
};
Ok(prog)
}
pub fn exec(
&self,
caps: &mut CaptureIdxs,
text: &str,
start: usize,
) -> bool {
match self.choose_engine(caps.len(), text) {
MatchEngine::Backtrack => Backtrack::exec(self, caps, text, start),
MatchEngine::Nfa => Nfa::exec(self, caps, text, start),
MatchEngine::Literals => {
match self.prefixes.find(&text[start..]) {
None => false,
Some((s, e)) => {
if caps.len() == 2 {
caps[0] = Some(start + s);
caps[1] = Some(start + e);
}
true
}
}
}
}
}
fn choose_engine(&self, cap_len: usize, text: &str) -> MatchEngine {
self.engine.unwrap_or_else(|| {
if cap_len <= 2
&& self.prefixes.preserves_priority()
&& self.prefixes_complete {
MatchEngine::Literals
} else if Backtrack::should_exec(self, text) {
MatchEngine::Backtrack
} else {
MatchEngine::Nfa
}
})
}
pub fn num_captures(&self) -> usize {
num_captures(&self.insts)
}
pub fn alloc_captures(&self) -> Vec<Option<usize>> {
vec![None; 2 * self.num_captures()]
}
pub fn find_prefixes(&mut self) {
use self::Inst::*;
let (ps, complete) = self.prefixes_from_insts(1);
if ps.len() > 0 {
self.prefixes = Prefix::new(ps);
self.prefixes_complete = complete;
return;
}
let mut pc = 1;
let mut prefixes = vec![];
let mut pcomplete = true;
while let Split(x, y) = self.insts[pc] {
let (xps, xcomplete) = self.prefixes_from_insts(x);
let (yps, ycomplete) = self.prefixes_from_insts(y);
let mut done = false;
match (&self.insts[x], &self.insts[y]) {
(&Split(_, _), &Split(_, _)) => return,
(_, &Split(_, _)) if xps.len() == 0 => return,
(_, &Split(_, _)) => {
pcomplete = pcomplete && xcomplete;
prefixes.extend(xps);
pc = y;
}
(&Split(_, _), _) if yps.len() == 0 => return,
(&Split(_, _), _) => {
pcomplete = pcomplete && ycomplete;
prefixes.extend(yps);
pc = x;
}
_ if xps.len() == 0 || yps.len() == 0 => return,
_ => {
pcomplete = pcomplete && xcomplete && ycomplete;
prefixes.extend(xps);
prefixes.extend(yps);
done = true;
}
}
if prefixes.len() > NUM_PREFIX_LIMIT {
return;
}
if done { break; }
}
self.prefixes = Prefix::new(prefixes);
self.prefixes_complete = pcomplete && self.prefixes.len() > 0;
}
fn prefixes_from_insts(&self, mut pc: usize) -> (Vec<String>, bool) {
use self::Inst::*;
let mut complete = true;
let mut alts = vec![String::new()];
while pc < self.insts.len() {
let inst = &self.insts[pc];
if alts[0].len() > PREFIX_LENGTH_LIMIT {
complete = false;
break;
}
match *inst {
Save(_) => { pc += 1; continue } Char(OneChar { c, casei: false }) => {
for alt in &mut alts {
alt.push(c);
}
pc += 1;
}
Ranges(CharRanges { ref ranges, casei: false }) => {
let nchars = num_chars_in_ranges(ranges);
if alts.len() * nchars > NUM_PREFIX_LIMIT {
complete = false;
break;
}
let orig = alts;
alts = Vec::with_capacity(orig.len());
for &(s, e) in ranges {
for c in (s as u32)..(e as u32 + 1){
for alt in &orig {
let mut alt = alt.clone();
alt.push(::std::char::from_u32(c).unwrap());
alts.push(alt);
}
}
}
pc += 1;
}
Jump(pc2) => pc = pc2,
_ => { complete = self.leads_to_match(pc); break }
}
}
if alts[0].len() == 0 {
(vec![], false)
} else {
(alts, complete)
}
}
fn leads_to_match(&self, mut pc: usize) -> bool {
loop {
match self.insts[pc] {
Inst::Match => return true,
Inst::Save(_) => pc += 1,
Inst::Jump(pc2) => pc = pc2,
_ => return false,
}
}
}
}
impl Clone for Program {
fn clone(&self) -> Program {
let (insts_len, ncaps) = (self.insts.len(), self.num_captures());
let create_threads = move || NfaThreads::new(insts_len, ncaps);
let create_backtrack = move || BackMachine::new();
Program {
original: self.original.clone(),
insts: self.insts.clone(),
cap_names: self.cap_names.clone(),
prefixes: self.prefixes.clone(),
prefixes_complete: self.prefixes_complete,
anchored_begin: self.anchored_begin,
anchored_end: self.anchored_end,
engine: self.engine,
nfa_threads: Pool::new(Box::new(create_threads)),
backtrack: Pool::new(Box::new(create_backtrack)),
}
}
}
fn num_captures(insts: &[Inst]) -> usize {
let mut n = 0;
for inst in insts {
match *inst {
Inst::Save(c) => n = cmp::max(n, c+1),
_ => {}
}
}
n / 2
}
fn num_chars_in_ranges(ranges: &[(char, char)]) -> usize {
ranges.iter()
.map(|&(s, e)| (e as u32) - (s as u32))
.fold(0, |acc, len| acc + len) as usize
}