use std::iter::Iterator;
use super::{Region, Regex, SearchOptions, SEARCH_OPTION_NONE};
impl Regex {
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
let mut region = Region::new();
self.search_with_options(text, 0, text.len(), SEARCH_OPTION_NONE, Some(&mut region))
.map(|pos| {
Captures {
text: text,
region: region,
offset: pos,
}
})
}
pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
FindMatches {
regex: self,
region: Region::new(),
text: text,
last_end: 0,
skip_next_empty: false,
}
}
pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> FindCaptures<'r, 't> {
FindCaptures {
regex: self,
text: text,
last_end: 0,
skip_next_empty: false,
}
}
pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
RegexSplits {
finder: self.find_iter(text),
last: 0,
}
}
pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: usize) -> RegexSplitsN<'r, 't> {
RegexSplitsN {
splits: self.split(text),
n: limit,
}
}
pub fn scan_with_region<F>(&self,
to_search: &str,
region: &mut Region,
options: SearchOptions,
mut callback: F)
-> i32
where F: Fn(i32, i32, &Region) -> bool
{
use onig_sys::{onig_scan, OnigRegion};
use std::mem::transmute;
use libc::{c_void, c_int};
let start = to_search.as_ptr();
let end = to_search[to_search.len()..].as_ptr();
extern "C" fn scan_cb<F>(i: c_int, j: c_int, r: *const OnigRegion, ud: *mut c_void) -> c_int
where F: Fn(i32, i32, &Region) -> bool
{
let region = Region::clone_from_raw(r);
let callback = unsafe { &*(ud as *mut F) };
if callback(i, j, ®ion) { 0 } else { -1 }
}
unsafe {
onig_scan(self.raw,
start,
end,
transmute(region),
options.bits(),
scan_cb::<F>,
&mut callback as *mut F as *mut c_void)
}
}
pub fn scan<'t, CB>(&self, to_search: &'t str, callback: CB)
where CB: Fn(i32, Captures<'t>) -> bool
{
let mut region = Region::new();
self.scan_with_region(to_search, &mut region, SEARCH_OPTION_NONE, |n, s, region| {
let captures = Captures {
text: to_search,
region: region.clone(),
offset: s as usize,
};
callback(n, captures)
});
}
}
#[derive(Debug)]
pub struct Captures<'t> {
text: &'t str,
region: Region,
offset: usize,
}
impl<'t> Captures<'t> {
pub fn pos(&self, pos: usize) -> Option<(usize, usize)> {
self.region.pos(pos)
}
pub fn at(&self, pos: usize) -> Option<&'t str> {
self.pos(pos).map(|(beg, end)| &self.text[beg..end])
}
pub fn len(&self) -> usize {
self.region.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn iter(&'t self) -> SubCaptures<'t> {
SubCaptures {
idx: 0,
caps: self,
}
}
pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
SubCapturesPos {
idx: 0,
caps: self,
}
}
pub fn offset(&self) -> usize {
self.offset
}
}
pub struct SubCaptures<'t> {
idx: usize,
caps: &'t Captures<'t>,
}
impl<'t> Iterator for SubCaptures<'t> {
type Item = Option<&'t str>;
fn next(&mut self) -> Option<Option<&'t str>> {
if self.idx < self.caps.len() {
self.idx += 1;
Some(self.caps.at(self.idx - 1))
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let size = self.caps.len();
(size, Some(size))
}
}
pub struct SubCapturesPos<'t> {
idx: usize,
caps: &'t Captures<'t>,
}
impl<'t> Iterator for SubCapturesPos<'t> {
type Item = Option<(usize, usize)>;
fn next(&mut self) -> Option<Option<(usize, usize)>> {
if self.idx < self.caps.len() {
self.idx += 1;
Some(self.caps.pos(self.idx - 1))
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let size = self.caps.len();
(size, Some(size))
}
}
pub struct FindMatches<'r, 't> {
regex: &'r Regex,
region: Region,
text: &'t str,
last_end: usize,
skip_next_empty: bool,
}
impl<'r, 't> Iterator for FindMatches<'r, 't> {
type Item = (usize, usize);
fn next(&mut self) -> Option<(usize, usize)> {
if self.last_end > self.text.len() {
return None;
}
self.region.clear();
let r = self.regex.search_with_options(self.text,
self.last_end,
self.text.len(),
SEARCH_OPTION_NONE,
Some(&mut self.region));
if r.is_none() {
return None;
}
let (s, e) = self.region.pos(0).unwrap();
self.last_end = e;
if e == s {
self.last_end += self.text[self.last_end..]
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1);
if self.skip_next_empty {
self.skip_next_empty = false;
return self.next();
}
} else {
self.skip_next_empty = true;
}
Some((s, e))
}
}
pub struct FindCaptures<'r, 't> {
regex: &'r Regex,
text: &'t str,
last_end: usize,
skip_next_empty: bool,
}
impl<'r, 't> Iterator for FindCaptures<'r, 't> {
type Item = Captures<'t>;
fn next(&mut self) -> Option<Captures<'t>> {
if self.last_end > self.text.len() {
return None;
}
let mut region = Region::new();
let r = self.regex.search_with_options(self.text,
self.last_end,
self.text.len(),
SEARCH_OPTION_NONE,
Some(&mut region));
if r.is_none() {
return None;
}
let (s, e) = region.pos(0).unwrap();
if e == s {
self.last_end += self.text[self.last_end..]
.chars()
.next()
.map(|c| c.len_utf8())
.unwrap_or(1);
if self.skip_next_empty {
self.skip_next_empty = false;
return self.next();
}
} else {
self.last_end = e;
self.skip_next_empty = true;
}
Some(Captures {
text: self.text,
region: region,
offset: r.unwrap(),
})
}
}
pub struct RegexSplits<'r, 't> {
finder: FindMatches<'r, 't>,
last: usize,
}
impl<'r, 't> Iterator for RegexSplits<'r, 't> {
type Item = &'t str;
fn next(&mut self) -> Option<&'t str> {
let text = self.finder.text;
match self.finder.next() {
None => {
if self.last >= text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len();
Some(s)
}
}
Some((s, e)) => {
let matched = &text[self.last..s];
self.last = e;
Some(matched)
}
}
}
}
pub struct RegexSplitsN<'r, 't> {
splits: RegexSplits<'r, 't>,
n: usize,
}
impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
type Item = &'t str;
fn next(&mut self) -> Option<&'t str> {
if self.n == 0 {
return None;
}
self.n -= 1;
if self.n == 0 {
let text = self.splits.finder.text;
Some(&text[self.splits.last..])
} else {
self.splits.next()
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.n))
}
}
#[cfg(test)]
mod tests {
use super::super::*;
#[test]
fn test_regex_captures() {
let regex = Regex::new("e(l+)|(r+)").unwrap();
let captures = regex.captures("hello").unwrap();
assert_eq!(captures.len(), 3);
assert_eq!(captures.is_empty(), false);
let pos1 = captures.pos(0).unwrap();
let pos2 = captures.pos(1).unwrap();
let pos3 = captures.pos(2);
assert_eq!(pos1, (1, 4));
assert_eq!(pos2, (2, 4));
assert_eq!(pos3, None);
let str1 = captures.at(0).unwrap();
let str2 = captures.at(1).unwrap();
let str3 = captures.at(2);
assert_eq!(str1, "ell");
assert_eq!(str2, "ll");
assert_eq!(str3, None);
}
#[test]
fn test_regex_subcaptures() {
let regex = Regex::new("e(l+)").unwrap();
let captures = regex.captures("hello").unwrap();
let caps = captures.iter().collect::<Vec<_>>();
assert_eq!(caps[0], Some("ell"));
assert_eq!(caps[1], Some("ll"));
assert_eq!(caps.len(), 2);
}
#[test]
fn test_regex_subcapturespos() {
let regex = Regex::new("e(l+)").unwrap();
let captures = regex.captures("hello").unwrap();
let caps = captures.iter_pos().collect::<Vec<_>>();
assert_eq!(caps[0], Some((1, 4)));
assert_eq!(caps[1], Some((2, 4)));
assert_eq!(caps.len(), 2);
}
#[test]
fn test_find_iter() {
let re = Regex::new(r"\d+").unwrap();
let ms = re.find_iter("a12b2").collect::<Vec<_>>();
assert_eq!(ms, vec![(1, 3), (4, 5)]);
}
#[test]
fn test_find_iter_one_zero_length() {
let re = Regex::new(r"\d*").unwrap();
let ms = re.find_iter("a1b2").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 0), (1, 2), (3, 4)]);
}
#[test]
fn test_find_iter_many_zero_length() {
let re = Regex::new(r"\d*").unwrap();
let ms = re.find_iter("a1bbb2").collect::<Vec<_>>();
assert_eq!(ms, vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)]);
}
#[test]
fn test_zero_length_matches_jumps_past_match_location() {
let re = Regex::new(r"\b").unwrap();
let matches = re.find_iter("test string").collect::<Vec<_>>();
assert_eq!(matches, [(0, 0), (4, 4), (5, 5), (11, 11)]);
}
#[test]
fn test_captures_iter() {
let re = Regex::new(r"\d+").unwrap();
let ms = re.captures_iter("a12b2").collect::<Vec<_>>();
assert_eq!(ms[0].pos(0).unwrap(), (1, 3));
assert_eq!(ms[1].pos(0).unwrap(), (4, 5));
}
#[test]
fn test_captures_stores_match_offset() {
let reg = Regex::new(r"\d+\.(\d+)").unwrap();
let captures = reg.captures("100 - 3.1415 / 2.0").unwrap();
assert_eq!(6, captures.offset());
let all_caps = reg.captures_iter("1 - 3234.3 * 123.2 - 100")
.map(|cap| cap.offset())
.collect::<Vec<_>>();
assert_eq!(vec![4, 13], all_caps);
}
}