use self::Inst::*;
use std::cmp;
use syntax::{self, Expr, Repeater};
use Error;
pub type InstIdx = usize;
#[allow(missing_docs)]
#[derive(Debug, Clone)]
pub enum Inst {
Match,
OneChar { c: char, casei: bool },
CharClass(syntax::CharClass),
Any,
AnyNoNL,
StartLine,
EndLine,
StartText,
EndText,
WordBoundary,
NotWordBoundary,
Save(usize),
Jump(InstIdx),
Split(InstIdx, InstIdx),
}
#[derive(Clone, Debug)]
pub struct Program {
pub insts: Vec<Inst>,
pub prefix: String,
}
impl Program {
pub fn new(ast: Expr, size: usize) -> Result<(Program, Vec<Option<String>>), Error> {
let mut c = Compiler {
insts: Vec::with_capacity(100),
names: vec![None],
size_limit: size,
};
c.insts.push(Save(0));
try!(c.compile(ast));
c.insts.push(Save(1));
c.insts.push(Match);
let mut pre = String::with_capacity(5);
for inst in c.insts[1..].iter() {
match *inst {
OneChar { c, casei: false } => pre.push(c),
_ => break
}
}
let Compiler { insts, names, .. } = c;
let prog = Program {
insts: insts,
prefix: pre,
};
Ok((prog, names))
}
pub fn num_captures(&self) -> usize {
let mut n = 0;
for inst in self.insts.iter() {
match *inst {
Save(c) => n = cmp::max(n, c+1),
_ => {}
}
}
n / 2
}
}
struct Compiler {
insts: Vec<Inst>,
names: Vec<Option<String>>,
size_limit: usize,
}
impl Compiler {
fn check_size(&self) -> Result<(), Error> {
if self.insts.len() * ::std::mem::size_of::<Inst>() > self.size_limit {
Err(Error::CompiledTooBig(self.size_limit))
} else {
Ok(())
}
}
fn compile(&mut self, ast: Expr) -> Result<(), Error> {
match ast {
Expr::Empty => {},
Expr::Literal { chars, casei } => {
for c in chars {
self.push(OneChar { c: c, casei: casei });
}
}
Expr::AnyChar => self.push(Any),
Expr::AnyCharNoNL => self.push(AnyNoNL),
Expr::Class(cls) => self.push(CharClass(cls)),
Expr::StartLine => self.push(StartLine),
Expr::EndLine => self.push(EndLine),
Expr::StartText => self.push(StartText),
Expr::EndText => self.push(EndText),
Expr::WordBoundary => self.push(WordBoundary),
Expr::NotWordBoundary => self.push(NotWordBoundary),
Expr::Group { e, i: None, name: None } => try!(self.compile(*e)),
Expr::Group { e, i, name } => {
let i = i.expect("capture index");
self.names.push(name);
self.push(Save(2 * i));
try!(self.compile(*e));
self.push(Save(2 * i + 1));
}
Expr::Concat(es) => {
for e in es {
try!(self.compile(e));
}
}
Expr::Alternate(mut es) => {
if es.len() == 0 {
return Ok(());
}
let e1 = es.remove(0);
if es.len() == 0 {
try!(self.compile(e1));
return Ok(());
}
let e2 = Expr::Alternate(es);
let split = self.empty_split(); let j1 = self.insts.len();
try!(self.compile(e1)); let jmp = self.empty_jump(); let j2 = self.insts.len();
try!(self.compile(e2)); let j3 = self.insts.len();
self.set_split(split, j1, j2); self.set_jump(jmp, j3); }
Expr::Repeat { e, r: Repeater::ZeroOrOne, greedy } => {
let split = self.empty_split();
let j1 = self.insts.len();
try!(self.compile(*e));
let j2 = self.insts.len();
if greedy {
self.set_split(split, j1, j2);
} else {
self.set_split(split, j2, j1);
}
}
Expr::Repeat { e, r: Repeater::ZeroOrMore, greedy } => {
let j1 = self.insts.len();
let split = self.empty_split();
let j2 = self.insts.len();
try!(self.compile(*e));
let jmp = self.empty_jump();
let j3 = self.insts.len();
self.set_jump(jmp, j1);
if greedy {
self.set_split(split, j2, j3);
} else {
self.set_split(split, j3, j2);
}
}
Expr::Repeat { e, r: Repeater::OneOrMore, greedy } => {
let j1 = self.insts.len();
try!(self.compile(*e));
let split = self.empty_split();
let j2 = self.insts.len();
if greedy {
self.set_split(split, j1, j2);
} else {
self.set_split(split, j2, j1);
}
}
Expr::Repeat { e, r: Repeater::Range { min, max: None }, greedy } => {
let e = *e;
for _ in 0..min {
try!(self.compile(e.clone()));
}
try!(self.compile(Expr::Repeat {
e: Box::new(e),
r: Repeater::ZeroOrMore,
greedy: greedy,
}));
}
Expr::Repeat { e, r: Repeater::Range { min, max: Some(max) }, greedy } => {
let e = *e;
for _ in 0..min {
try!(self.compile(e.clone()));
}
for _ in min..max {
try!(self.compile(Expr::Repeat {
e: Box::new(e.clone()),
r: Repeater::ZeroOrOne,
greedy: greedy,
}));
}
}
}
self.check_size()
}
#[inline]
fn push(&mut self, x: Inst) {
self.insts.push(x)
}
#[inline]
fn empty_split(&mut self) -> InstIdx {
self.insts.push(Split(0, 0));
self.insts.len() - 1
}
#[inline]
fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
let split = &mut self.insts[i];
match *split {
Split(_, _) => *split = Split(pc1, pc2),
_ => panic!("BUG: Invalid split index."),
}
}
#[inline]
fn empty_jump(&mut self) -> InstIdx {
self.insts.push(Jump(0));
self.insts.len() - 1
}
#[inline]
fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
let jmp = &mut self.insts[i];
match *jmp {
Jump(_) => *jmp = Jump(pc),
_ => panic!("BUG: Invalid jump index."),
}
}
}