use alloc::{borrow::Cow, string::String, sync::Arc, vec::Vec};
use regex_automata::{meta, util::captures, Input, PatternID};
use crate::{bytes::RegexBuilder, error::Error};
#[derive(Clone)]
pub struct Regex {
pub(crate) meta: meta::Regex,
pub(crate) pattern: Arc<str>,
}
impl core::fmt::Display for Regex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl core::fmt::Debug for Regex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_tuple("Regex").field(&self.as_str()).finish()
}
}
impl core::str::FromStr for Regex {
type Err = Error;
fn from_str(s: &str) -> Result<Regex, Error> {
Regex::new(s)
}
}
impl TryFrom<&str> for Regex {
type Error = Error;
fn try_from(s: &str) -> Result<Regex, Error> {
Regex::new(s)
}
}
impl TryFrom<String> for Regex {
type Error = Error;
fn try_from(s: String) -> Result<Regex, Error> {
Regex::new(&s)
}
}
impl Regex {
pub fn new(re: &str) -> Result<Regex, Error> {
RegexBuilder::new(re).build()
}
#[inline]
pub fn is_match(&self, haystack: &[u8]) -> bool {
self.is_match_at(haystack, 0)
}
#[inline]
pub fn find<'h>(&self, haystack: &'h [u8]) -> Option<Match<'h>> {
self.find_at(haystack, 0)
}
#[inline]
pub fn find_iter<'r, 'h>(&'r self, haystack: &'h [u8]) -> Matches<'r, 'h> {
Matches { haystack, it: self.meta.find_iter(haystack) }
}
#[inline]
pub fn captures<'h>(&self, haystack: &'h [u8]) -> Option<Captures<'h>> {
self.captures_at(haystack, 0)
}
#[inline]
pub fn captures_iter<'r, 'h>(
&'r self,
haystack: &'h [u8],
) -> CaptureMatches<'r, 'h> {
CaptureMatches { haystack, it: self.meta.captures_iter(haystack) }
}
#[inline]
pub fn split<'r, 'h>(&'r self, haystack: &'h [u8]) -> Split<'r, 'h> {
Split { haystack, it: self.meta.split(haystack) }
}
#[inline]
pub fn splitn<'r, 'h>(
&'r self,
haystack: &'h [u8],
limit: usize,
) -> SplitN<'r, 'h> {
SplitN { haystack, it: self.meta.splitn(haystack, limit) }
}
#[inline]
pub fn replace<'h, R: Replacer>(
&self,
haystack: &'h [u8],
rep: R,
) -> Cow<'h, [u8]> {
self.replacen(haystack, 1, rep)
}
#[inline]
pub fn replace_all<'h, R: Replacer>(
&self,
haystack: &'h [u8],
rep: R,
) -> Cow<'h, [u8]> {
self.replacen(haystack, 0, rep)
}
#[inline]
pub fn replacen<'h, R: Replacer>(
&self,
haystack: &'h [u8],
limit: usize,
mut rep: R,
) -> Cow<'h, [u8]> {
if let Some(rep) = rep.no_expansion() {
let mut it = self.find_iter(haystack).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(haystack);
}
let mut new = Vec::with_capacity(haystack.len());
let mut last_match = 0;
for (i, m) in it {
new.extend_from_slice(&haystack[last_match..m.start()]);
new.extend_from_slice(&rep);
last_match = m.end();
if limit > 0 && i >= limit - 1 {
break;
}
}
new.extend_from_slice(&haystack[last_match..]);
return Cow::Owned(new);
}
let mut it = self.captures_iter(haystack).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(haystack);
}
let mut new = Vec::with_capacity(haystack.len());
let mut last_match = 0;
for (i, cap) in it {
let m = cap.get(0).unwrap();
new.extend_from_slice(&haystack[last_match..m.start()]);
rep.replace_append(&cap, &mut new);
last_match = m.end();
if limit > 0 && i >= limit - 1 {
break;
}
}
new.extend_from_slice(&haystack[last_match..]);
Cow::Owned(new)
}
}
impl Regex {
#[inline]
pub fn shortest_match(&self, haystack: &[u8]) -> Option<usize> {
self.shortest_match_at(haystack, 0)
}
#[inline]
pub fn shortest_match_at(
&self,
haystack: &[u8],
start: usize,
) -> Option<usize> {
let input =
Input::new(haystack).earliest(true).span(start..haystack.len());
self.meta.search_half(&input).map(|hm| hm.offset())
}
#[inline]
pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool {
self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
}
#[inline]
pub fn find_at<'h>(
&self,
haystack: &'h [u8],
start: usize,
) -> Option<Match<'h>> {
let input = Input::new(haystack).span(start..haystack.len());
self.meta.find(input).map(|m| Match::new(haystack, m.start(), m.end()))
}
#[inline]
pub fn captures_at<'h>(
&self,
haystack: &'h [u8],
start: usize,
) -> Option<Captures<'h>> {
let input = Input::new(haystack).span(start..haystack.len());
let mut caps = self.meta.create_captures();
self.meta.captures(input, &mut caps);
if caps.is_match() {
let static_captures_len = self.static_captures_len();
Some(Captures { haystack, caps, static_captures_len })
} else {
None
}
}
#[inline]
pub fn captures_read<'h>(
&self,
locs: &mut CaptureLocations,
haystack: &'h [u8],
) -> Option<Match<'h>> {
self.captures_read_at(locs, haystack, 0)
}
#[inline]
pub fn captures_read_at<'h>(
&self,
locs: &mut CaptureLocations,
haystack: &'h [u8],
start: usize,
) -> Option<Match<'h>> {
let input = Input::new(haystack).span(start..haystack.len());
self.meta.search_captures(&input, &mut locs.0);
locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end()))
}
#[doc(hidden)]
#[inline]
pub fn read_captures_at<'h>(
&self,
locs: &mut CaptureLocations,
haystack: &'h [u8],
start: usize,
) -> Option<Match<'h>> {
self.captures_read_at(locs, haystack, start)
}
}
impl Regex {
#[inline]
pub fn as_str(&self) -> &str {
&self.pattern
}
#[inline]
pub fn capture_names(&self) -> CaptureNames<'_> {
CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO))
}
#[inline]
pub fn captures_len(&self) -> usize {
self.meta.group_info().group_len(PatternID::ZERO)
}
#[inline]
pub fn static_captures_len(&self) -> Option<usize> {
self.meta.static_captures_len()
}
#[inline]
pub fn capture_locations(&self) -> CaptureLocations {
CaptureLocations(self.meta.create_captures())
}
#[doc(hidden)]
#[inline]
pub fn locations(&self) -> CaptureLocations {
self.capture_locations()
}
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct Match<'h> {
haystack: &'h [u8],
start: usize,
end: usize,
}
impl<'h> Match<'h> {
#[inline]
pub fn start(&self) -> usize {
self.start
}
#[inline]
pub fn end(&self) -> usize {
self.end
}
#[inline]
pub fn is_empty(&self) -> bool {
self.start == self.end
}
#[inline]
pub fn len(&self) -> usize {
self.end - self.start
}
#[inline]
pub fn range(&self) -> core::ops::Range<usize> {
self.start..self.end
}
#[inline]
pub fn as_bytes(&self) -> &'h [u8] {
&self.haystack[self.range()]
}
#[inline]
fn new(haystack: &'h [u8], start: usize, end: usize) -> Match<'h> {
Match { haystack, start, end }
}
}
impl<'h> core::fmt::Debug for Match<'h> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
use regex_automata::util::escape::DebugHaystack;
let mut fmt = f.debug_struct("Match");
fmt.field("start", &self.start)
.field("end", &self.end)
.field("bytes", &DebugHaystack(&self.as_bytes()));
fmt.finish()
}
}
impl<'h> From<Match<'h>> for &'h [u8] {
fn from(m: Match<'h>) -> &'h [u8] {
m.as_bytes()
}
}
impl<'h> From<Match<'h>> for core::ops::Range<usize> {
fn from(m: Match<'h>) -> core::ops::Range<usize> {
m.range()
}
}
pub struct Captures<'h> {
haystack: &'h [u8],
caps: captures::Captures,
static_captures_len: Option<usize>,
}
impl<'h> Captures<'h> {
#[inline]
pub fn get(&self, i: usize) -> Option<Match<'h>> {
self.caps
.get_group(i)
.map(|sp| Match::new(self.haystack, sp.start, sp.end))
}
#[inline]
pub fn get_match(&self) -> Match<'h> {
self.get(0).unwrap()
}
#[inline]
pub fn name(&self, name: &str) -> Option<Match<'h>> {
self.caps
.get_group_by_name(name)
.map(|sp| Match::new(self.haystack, sp.start, sp.end))
}
pub fn extract<const N: usize>(&self) -> (&'h [u8], [&'h [u8]; N]) {
let len = self
.static_captures_len
.expect("number of capture groups can vary in a match")
.checked_sub(1)
.expect("number of groups is always greater than zero");
assert_eq!(N, len, "asked for {N} groups, but must ask for {len}");
self.caps.extract_bytes(self.haystack)
}
#[inline]
pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) {
self.caps.interpolate_bytes_into(self.haystack, replacement, dst);
}
#[inline]
pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> {
SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() }
}
#[inline]
pub fn len(&self) -> usize {
self.caps.group_len()
}
}
impl<'h> core::fmt::Debug for Captures<'h> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
struct CapturesDebugMap<'a> {
caps: &'a Captures<'a>,
}
impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
let mut map = f.debug_map();
let names =
self.caps.caps.group_info().pattern_names(PatternID::ZERO);
for (group_index, maybe_name) in names.enumerate() {
let key = Key(group_index, maybe_name);
match self.caps.get(group_index) {
None => map.entry(&key, &None::<()>),
Some(mat) => map.entry(&key, &Value(mat)),
};
}
map.finish()
}
}
struct Key<'a>(usize, Option<&'a str>);
impl<'a> core::fmt::Debug for Key<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "{}", self.0)?;
if let Some(name) = self.1 {
write!(f, "/{name:?}")?;
}
Ok(())
}
}
struct Value<'a>(Match<'a>);
impl<'a> core::fmt::Debug for Value<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
use regex_automata::util::escape::DebugHaystack;
write!(
f,
"{}..{}/{:?}",
self.0.start(),
self.0.end(),
DebugHaystack(self.0.as_bytes())
)
}
}
f.debug_tuple("Captures")
.field(&CapturesDebugMap { caps: self })
.finish()
}
}
impl<'h> core::ops::Index<usize> for Captures<'h> {
type Output = [u8];
fn index<'a>(&'a self, i: usize) -> &'a [u8] {
self.get(i)
.map(|m| m.as_bytes())
.unwrap_or_else(|| panic!("no group at index '{i}'"))
}
}
impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
type Output = [u8];
fn index<'a>(&'a self, name: &'n str) -> &'a [u8] {
self.name(name)
.map(|m| m.as_bytes())
.unwrap_or_else(|| panic!("no group named '{name}'"))
}
}
#[derive(Clone, Debug)]
pub struct CaptureLocations(captures::Captures);
#[doc(hidden)]
pub type Locations = CaptureLocations;
impl CaptureLocations {
#[inline]
pub fn get(&self, i: usize) -> Option<(usize, usize)> {
self.0.get_group(i).map(|sp| (sp.start, sp.end))
}
#[inline]
pub fn len(&self) -> usize {
self.0.group_info().group_len(PatternID::ZERO)
}
#[doc(hidden)]
#[inline]
pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
self.get(i)
}
}
#[derive(Debug)]
pub struct Matches<'r, 'h> {
haystack: &'h [u8],
it: meta::FindMatches<'r, 'h>,
}
impl<'r, 'h> Iterator for Matches<'r, 'h> {
type Item = Match<'h>;
#[inline]
fn next(&mut self) -> Option<Match<'h>> {
self.it
.next()
.map(|sp| Match::new(self.haystack, sp.start(), sp.end()))
}
#[inline]
fn count(self) -> usize {
self.it.count()
}
}
impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {}
#[derive(Debug)]
pub struct CaptureMatches<'r, 'h> {
haystack: &'h [u8],
it: meta::CapturesMatches<'r, 'h>,
}
impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> {
type Item = Captures<'h>;
#[inline]
fn next(&mut self) -> Option<Captures<'h>> {
let static_captures_len = self.it.regex().static_captures_len();
self.it.next().map(|caps| Captures {
haystack: self.haystack,
caps,
static_captures_len,
})
}
#[inline]
fn count(self) -> usize {
self.it.count()
}
}
impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {}
#[derive(Debug)]
pub struct Split<'r, 'h> {
haystack: &'h [u8],
it: meta::Split<'r, 'h>,
}
impl<'r, 'h> Iterator for Split<'r, 'h> {
type Item = &'h [u8];
#[inline]
fn next(&mut self) -> Option<&'h [u8]> {
self.it.next().map(|span| &self.haystack[span])
}
}
impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {}
#[derive(Debug)]
pub struct SplitN<'r, 'h> {
haystack: &'h [u8],
it: meta::SplitN<'r, 'h>,
}
impl<'r, 'h> Iterator for SplitN<'r, 'h> {
type Item = &'h [u8];
#[inline]
fn next(&mut self) -> Option<&'h [u8]> {
self.it.next().map(|span| &self.haystack[span])
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
}
impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>);
impl<'r> Iterator for CaptureNames<'r> {
type Item = Option<&'r str>;
#[inline]
fn next(&mut self) -> Option<Option<&'r str>> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
#[inline]
fn count(self) -> usize {
self.0.count()
}
}
impl<'r> ExactSizeIterator for CaptureNames<'r> {}
impl<'r> core::iter::FusedIterator for CaptureNames<'r> {}
#[derive(Clone, Debug)]
pub struct SubCaptureMatches<'c, 'h> {
haystack: &'h [u8],
it: captures::CapturesPatternIter<'c>,
}
impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> {
type Item = Option<Match<'h>>;
#[inline]
fn next(&mut self) -> Option<Option<Match<'h>>> {
self.it.next().map(|group| {
group.map(|sp| Match::new(self.haystack, sp.start, sp.end))
})
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
#[inline]
fn count(self) -> usize {
self.it.count()
}
}
impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {}
impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {}
pub trait Replacer {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>);
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
None
}
fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
ReplacerRef(self)
}
}
impl<'a, const N: usize> Replacer for &'a [u8; N] {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(&**self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<const N: usize> Replacer for [u8; N] {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(&*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a [u8] {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Vec<u8> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl Replacer for Vec<u8> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for Cow<'a, [u8]> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(self.as_ref(), dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Cow<'a, [u8]> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
caps.expand(self.as_ref(), dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
no_expansion(self)
}
}
impl<F, T> Replacer for F
where
F: FnMut(&Captures<'_>) -> T,
T: AsRef<[u8]>,
{
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
dst.extend_from_slice((*self)(caps).as_ref());
}
}
#[derive(Debug)]
pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) {
self.0.replace_append(caps, dst)
}
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
self.0.no_expansion()
}
}
#[derive(Clone, Debug)]
pub struct NoExpand<'s>(pub &'s [u8]);
impl<'s> Replacer for NoExpand<'s> {
fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) {
dst.extend_from_slice(self.0);
}
fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> {
Some(Cow::Borrowed(self.0))
}
}
fn no_expansion<T: AsRef<[u8]>>(replacement: &T) -> Option<Cow<'_, [u8]>> {
let replacement = replacement.as_ref();
match crate::find_byte::find_byte(b'$', replacement) {
Some(_) => None,
None => Some(Cow::Borrowed(replacement)),
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::format;
#[test]
fn test_match_properties() {
let haystack = b"Hello, world!";
let m = Match::new(haystack, 7, 12);
assert_eq!(m.start(), 7);
assert_eq!(m.end(), 12);
assert_eq!(m.is_empty(), false);
assert_eq!(m.len(), 5);
assert_eq!(m.as_bytes(), b"world");
}
#[test]
fn test_empty_match() {
let haystack = b"";
let m = Match::new(haystack, 0, 0);
assert_eq!(m.is_empty(), true);
assert_eq!(m.len(), 0);
}
#[test]
fn test_debug_output_valid_utf8() {
let haystack = b"Hello, world!";
let m = Match::new(haystack, 7, 12);
let debug_str = format!("{m:?}");
assert_eq!(
debug_str,
r#"Match { start: 7, end: 12, bytes: "world" }"#
);
}
#[test]
fn test_debug_output_invalid_utf8() {
let haystack = b"Hello, \xFFworld!";
let m = Match::new(haystack, 7, 13);
let debug_str = format!("{m:?}");
assert_eq!(
debug_str,
r#"Match { start: 7, end: 13, bytes: "\xffworld" }"#
);
}
#[test]
fn test_debug_output_various_unicode() {
let haystack =
"Hello, 😊 world! 안녕하세요? Ù…Ø±ØØ¨Ø§ بالعالم!".as_bytes();
let m = Match::new(haystack, 0, haystack.len());
let debug_str = format!("{m:?}");
assert_eq!(
debug_str,
r#"Match { start: 0, end: 62, bytes: "Hello, 😊 world! 안녕하세요? Ù…Ø±ØØ¨Ø§ بالعالم!" }"#
);
}
#[test]
fn test_debug_output_ascii_escape() {
let haystack = b"Hello,\tworld!\nThis is a \x1b[31mtest\x1b[0m.";
let m = Match::new(haystack, 0, haystack.len());
let debug_str = format!("{m:?}");
assert_eq!(
debug_str,
r#"Match { start: 0, end: 38, bytes: "Hello,\tworld!\nThis is a \u{1b}[31mtest\u{1b}[0m." }"#
);
}
#[test]
fn test_debug_output_match_in_middle() {
let haystack = b"The quick brown fox jumps over the lazy dog.";
let m = Match::new(haystack, 16, 19);
let debug_str = format!("{m:?}");
assert_eq!(debug_str, r#"Match { start: 16, end: 19, bytes: "fox" }"#);
}
}