use std::collections::HashSet;
use std::iter;
use syntax::{Expr, Repeater, CharClass, ClassRange};
use Error;
use inst::{
EmptyLook,
Inst, InstIdx,
InstSave, InstSplit, InstEmptyLook, InstChar, InstRanges,
};
pub type Compiled = (Vec<Inst>, Vec<Option<String>>);
type CompileResult = Result<Hole, Error>;
pub struct Compiler {
size_limit: usize,
insts: Vec<MaybeInst>,
cap_names: Vec<Option<String>>,
seen_caps: HashSet<usize>,
}
impl Compiler {
pub fn new(size_limit: usize) -> Compiler {
Compiler {
size_limit: size_limit,
insts: vec![],
cap_names: vec![None],
seen_caps: HashSet::new(),
}
}
pub fn compile(mut self, expr: &Expr) -> Result<Compiled, Error> {
let hole = try!(self.c_capture(0, expr));
self.fill_to_next(hole);
self.push_compiled(Inst::Match);
let insts = self.insts.into_iter().map(|inst| inst.unwrap()).collect();
Ok((insts, self.cap_names))
}
fn c(&mut self, expr: &Expr) -> CompileResult {
use inst;
use syntax::Expr::*;
try!(self.check_size());
match *expr {
Empty => Ok(Hole::None),
Literal { ref chars, casei } => self.c_literal(chars, casei),
AnyChar => self.c_class(Some(('\x00', '\u{10ffff}'))),
AnyCharNoNL => {
let ranges = &[('\x00', '\x09'), ('\x0b', '\u{10ffff}')];
self.c_class(ranges.iter().cloned())
}
Class(ref cls) => {
let ranges = cls.iter().map(|c| (c.start, c.end));
self.c_class(ranges)
}
StartLine => self.c_empty_look(inst::EmptyLook::StartLine),
EndLine => self.c_empty_look(inst::EmptyLook::EndLine),
StartText => self.c_empty_look(inst::EmptyLook::StartText),
EndText => self.c_empty_look(inst::EmptyLook::EndText),
WordBoundary => self.c_empty_look(inst::EmptyLook::WordBoundary),
NotWordBoundary => {
self.c_empty_look(inst::EmptyLook::NotWordBoundary)
}
Group { ref e, i: None, name: None } => self.c(e),
Group { ref e, i, ref name } => {
let i = i.expect("capture index");
if !self.seen_caps.contains(&i) {
self.cap_names.push(name.clone());
self.seen_caps.insert(i);
}
self.c_capture(2 * i, e)
}
Concat(ref es) => self.c_concat(es.iter()),
Alternate(ref es) => self.c_alternate(&**es),
Repeat { ref e, r, greedy } => self.c_repeat(e, r, greedy),
}
}
fn c_capture(&mut self, first_slot: usize, expr: &Expr) -> CompileResult {
let hole = self.push_hole(MaybeInst::Save { slot: first_slot });
self.fill_to_next(hole);
let hole = try!(self.c(expr));
self.fill_to_next(hole);
Ok(self.push_hole(MaybeInst::Save { slot: first_slot + 1 }))
}
fn c_literal(&mut self, chars: &[char], casei: bool) -> CompileResult {
assert!(!chars.is_empty());
if casei {
let mut prev_hole = Hole::None;
for &c in chars {
self.fill_to_next(prev_hole);
let class = CharClass::new(vec![
ClassRange { start: c, end: c },
]);
prev_hole = try!(self.c(&Expr::Class(class.case_fold())));
}
Ok(prev_hole)
} else {
let mut prev_hole = Hole::None;
for &c in chars {
self.fill_to_next(prev_hole);
prev_hole = self.push_hole(MaybeInst::Char { c: c });
}
Ok(prev_hole)
}
}
fn c_class<I>(&mut self, ranges: I) -> CompileResult
where I: IntoIterator<Item=(char, char)> {
let ranges: Vec<(char, char)> = ranges.into_iter().collect();
Ok(if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
self.push_hole(MaybeInst::Char { c: ranges[0].0 })
} else {
self.push_hole(MaybeInst::Ranges { ranges: ranges })
})
}
fn c_empty_look(&mut self, look: EmptyLook) -> CompileResult {
Ok(self.push_hole(MaybeInst::EmptyLook { look: look }))
}
fn c_concat<'a, I>(&mut self, exprs: I) -> CompileResult
where I: IntoIterator<Item=&'a Expr> {
let mut prev_hole = Hole::None;
for e in exprs {
self.fill_to_next(prev_hole);
prev_hole = try!(self.c(e));
}
Ok(prev_hole)
}
fn c_alternate(&mut self, exprs: &[Expr]) -> CompileResult {
assert!(exprs.len() >= 2, "alternates must have at least 2 exprs");
let mut holes = vec![];
for e in &exprs[0..exprs.len() - 1] {
let split = self.push_split_hole();
let goto1 = self.insts.len();
holes.push(try!(self.c(e)));
let goto2 = self.insts.len();
self.fill_split(split, Some(goto1), Some(goto2));
}
holes.push(try!(self.c(&exprs[exprs.len() - 1])));
Ok(Hole::Many(holes))
}
fn c_repeat(
&mut self,
expr: &Expr,
kind: Repeater,
greedy: bool,
) -> CompileResult {
match kind {
Repeater::ZeroOrOne => self.c_repeat_zero_or_one(expr, greedy),
Repeater::ZeroOrMore => self.c_repeat_zero_or_more(expr, greedy),
Repeater::OneOrMore => self.c_repeat_one_or_more(expr, greedy),
Repeater::Range { min, max: None } => {
self.c_repeat_range_min_or_more(expr, greedy, min)
}
Repeater::Range { min, max: Some(max) } => {
self.c_repeat_range(expr, greedy, min, max)
}
}
}
fn c_repeat_zero_or_one(
&mut self,
expr: &Expr,
greedy: bool,
) -> CompileResult {
let split = self.push_split_hole();
let goto1 = self.insts.len();
let hole1 = try!(self.c(expr));
let hole2 = if greedy {
self.fill_split(split, Some(goto1), None)
} else {
self.fill_split(split, None, Some(goto1))
};
Ok(Hole::Many(vec![hole1, hole2]))
}
fn c_repeat_zero_or_more(
&mut self,
expr: &Expr,
greedy: bool,
) -> CompileResult {
let goto_split = self.insts.len();
let split = self.push_split_hole();
let goto_rep_expr = self.insts.len();
let hole_rep_expr = try!(self.c(expr));
self.fill(hole_rep_expr, goto_split);
Ok(if greedy {
self.fill_split(split, Some(goto_rep_expr), None)
} else {
self.fill_split(split, None, Some(goto_rep_expr))
})
}
fn c_repeat_one_or_more(
&mut self,
expr: &Expr,
greedy: bool,
) -> CompileResult {
let goto_rep_expr = self.insts.len();
let hole_rep_expr = try!(self.c(expr));
self.fill_to_next(hole_rep_expr);
let split = self.push_split_hole();
Ok(if greedy {
self.fill_split(split, Some(goto_rep_expr), None)
} else {
self.fill_split(split, None, Some(goto_rep_expr))
})
}
fn c_repeat_range_min_or_more(
&mut self,
expr: &Expr,
greedy: bool,
min: u32,
) -> CompileResult {
let min = u32_to_usize(min);
if min == 0 {
return self.c_repeat_zero_or_more(expr, greedy);
}
let hole = try!(self.c_concat(iter::repeat(expr).take(min - 1)));
self.fill_to_next(hole);
self.c_repeat_one_or_more(expr, greedy)
}
fn c_repeat_range(
&mut self,
expr: &Expr,
greedy: bool,
min: u32,
max: u32,
) -> CompileResult {
let (min, max) = (u32_to_usize(min), u32_to_usize(max));
let hole = try!(self.c_concat(iter::repeat(expr).take(min)));
if min == max {
return Ok(hole);
}
self.fill_to_next(hole);
let mut holes = vec![];
let mut prev_hole = Hole::None;
for _ in min..max {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let goto_rep_expr = self.insts.len();
prev_hole = try!(self.c(expr));
if greedy {
holes.push(self.fill_split(split, Some(goto_rep_expr), None));
} else {
holes.push(self.fill_split(split, None, Some(goto_rep_expr)));
}
}
holes.push(prev_hole);
Ok(Hole::Many(holes))
}
fn fill(&mut self, hole: Hole, goto: InstIdx) {
match hole {
Hole::None => {}
Hole::One(pc) => {
self.insts[pc].complete(goto);
}
Hole::Many(holes) => {
for hole in holes {
self.fill(hole, goto);
}
}
}
}
fn fill_to_next(&mut self, hole: Hole) {
let next = self.insts.len();
self.fill(hole, next);
}
fn fill_split(
&mut self,
hole: Hole,
goto1: Option<InstIdx>,
goto2: Option<InstIdx>,
) -> Hole {
match hole {
Hole::None => Hole::None,
Hole::One(pc) => {
match (goto1, goto2) {
(Some(goto1), Some(goto2)) => {
self.insts[pc].complete_split(goto1, goto2);
Hole::None
}
(Some(goto1), None) => {
self.insts[pc].complete_split_goto1(goto1);
Hole::One(pc)
}
(None, Some(goto2)) => {
self.insts[pc].complete_split_goto2(goto2);
Hole::One(pc)
}
(None, None) => unreachable!("at least one of the split \
holes must be filled"),
}
}
Hole::Many(holes) => {
let mut new_holes = vec![];
for hole in holes {
new_holes.push(self.fill_split(hole, goto1, goto2));
}
if new_holes.is_empty() {
Hole::None
} else if new_holes.len() == 1 {
new_holes.pop().unwrap()
} else {
Hole::Many(new_holes)
}
}
}
}
fn push_compiled(&mut self, inst: Inst) {
self.insts.push(MaybeInst::Compiled(inst));
}
fn push_hole(&mut self, inst: MaybeInst) -> Hole {
let hole = self.insts.len();
self.insts.push(inst);
Hole::One(hole)
}
fn push_split_hole(&mut self) -> Hole {
let hole = self.insts.len();
self.insts.push(MaybeInst::Split);
Hole::One(hole)
}
fn check_size(&self) -> Result<(), Error> {
use std::mem::size_of;
if self.insts.len() * size_of::<Inst>() > self.size_limit {
Err(Error::CompiledTooBig(self.size_limit))
} else {
Ok(())
}
}
}
#[derive(Debug)]
enum Hole {
None,
One(InstIdx),
Many(Vec<Hole>),
}
#[derive(Clone, Debug)]
enum MaybeInst {
Compiled(Inst),
Split,
Split1(InstIdx),
Split2(InstIdx),
Save { slot: usize },
EmptyLook { look: EmptyLook },
Char { c: char },
Ranges { ranges: Vec<(char, char)> },
}
impl MaybeInst {
fn complete(&mut self, goto: InstIdx) {
let filled = match *self {
MaybeInst::Save { slot } => Inst::Save(InstSave {
goto: goto,
slot: slot,
}),
MaybeInst::EmptyLook { look } => Inst::EmptyLook(InstEmptyLook {
goto: goto,
look: look,
}),
MaybeInst::Char { c } => Inst::Char(InstChar {
goto: goto,
c: c,
}),
MaybeInst::Ranges { ref ranges } => Inst::Ranges(InstRanges {
goto: goto,
ranges: ranges.clone(),
}),
MaybeInst::Split1(goto1) => {
Inst::Split(InstSplit { goto1: goto1, goto2: goto })
}
MaybeInst::Split2(goto2) => {
Inst::Split(InstSplit { goto1: goto, goto2: goto2 })
}
_ => unreachable!("must be called on an uncompiled instruction \
with exactly one missing goto field, \
instead it was called on: {:?}", self),
};
*self = MaybeInst::Compiled(filled);
}
fn complete_split(&mut self, goto1: InstIdx, goto2: InstIdx) {
let filled = match *self {
MaybeInst::Split => {
Inst::Split(InstSplit { goto1: goto1, goto2: goto2 })
}
_ => unreachable!("must be called on Split instruction, \
instead it was called on: {:?}", self),
};
*self = MaybeInst::Compiled(filled);
}
fn complete_split_goto1(&mut self, goto1: InstIdx) {
let half_filled = match *self {
MaybeInst::Split => goto1,
_ => unreachable!("must be called on Split instruction, \
instead it was called on: {:?}", self),
};
*self = MaybeInst::Split1(half_filled);
}
fn complete_split_goto2(&mut self, goto2: InstIdx) {
let half_filled = match *self {
MaybeInst::Split => goto2,
_ => unreachable!("must be called on Split instruction, \
instead it was called on: {:?}", self),
};
*self = MaybeInst::Split2(half_filled);
}
fn unwrap(self) -> Inst {
match self {
MaybeInst::Compiled(inst) => inst,
_ => unreachable!("must be called on a compiled instruction, \
instead it was called on: {:?}", self),
}
}
}
fn u32_to_usize(n: u32) -> usize {
if (n as u64) > (::std::usize::MAX as u64) {
panic!("BUG: {} is too big to be pointer sized", n)
}
n as usize
}