use core::{iter, slice, str};
#[cfg(all(feature = "alloc", feature = "unicode"))]
use alloc::vec;
#[cfg(feature = "alloc")]
use alloc::{borrow::Cow, string::String, vec::Vec};
#[cfg(feature = "std")]
use std::{ffi::OsStr, path::Path};
use memchr::{memchr, memmem, memrchr};
use crate::escape_bytes::EscapeBytes;
#[cfg(feature = "alloc")]
use crate::ext_vec::ByteVec;
#[cfg(feature = "unicode")]
use crate::unicode::{
whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
WordsWithBreaks,
};
use crate::{
ascii,
bstr::BStr,
byteset,
utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
};
#[allow(non_snake_case)]
#[inline]
pub fn B<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &[u8] {
bytes.as_ref()
}
impl ByteSlice for [u8] {
#[inline]
fn as_bytes(&self) -> &[u8] {
self
}
#[inline]
fn as_bytes_mut(&mut self) -> &mut [u8] {
self
}
}
impl<const N: usize> ByteSlice for [u8; N] {
#[inline]
fn as_bytes(&self) -> &[u8] {
self
}
#[inline]
fn as_bytes_mut(&mut self) -> &mut [u8] {
self
}
}
mod private {
pub trait Sealed {}
}
impl private::Sealed for [u8] {}
impl<const N: usize> private::Sealed for [u8; N] {}
pub trait ByteSlice: private::Sealed {
#[doc(hidden)]
fn as_bytes(&self) -> &[u8];
#[doc(hidden)]
fn as_bytes_mut(&mut self) -> &mut [u8];
#[inline]
fn as_bstr(&self) -> &BStr {
BStr::new(self.as_bytes())
}
#[inline]
fn as_bstr_mut(&mut self) -> &mut BStr {
BStr::new_mut(self.as_bytes_mut())
}
#[cfg(feature = "std")]
#[inline]
fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
#[cfg(unix)]
#[inline]
fn imp(os_str: &OsStr) -> Option<&[u8]> {
use std::os::unix::ffi::OsStrExt;
Some(os_str.as_bytes())
}
#[cfg(not(unix))]
#[inline]
fn imp(os_str: &OsStr) -> Option<&[u8]> {
os_str.to_str().map(|s| s.as_bytes())
}
imp(os_str)
}
#[cfg(feature = "std")]
#[inline]
fn from_path(path: &Path) -> Option<&[u8]> {
Self::from_os_str(path.as_os_str())
}
#[inline]
fn to_str(&self) -> Result<&str, Utf8Error> {
utf8::validate(self.as_bytes()).map(|_| {
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
})
}
#[inline]
unsafe fn to_str_unchecked(&self) -> &str {
str::from_utf8_unchecked(self.as_bytes())
}
#[cfg(feature = "alloc")]
#[inline]
fn to_str_lossy(&self) -> Cow<'_, str> {
match utf8::validate(self.as_bytes()) {
Ok(()) => {
unsafe {
Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
}
}
Err(err) => {
let mut lossy = String::with_capacity(self.as_bytes().len());
let (valid, after) =
self.as_bytes().split_at(err.valid_up_to());
lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
lossy.push_str("\u{FFFD}");
if let Some(len) = err.error_len() {
after[len..].to_str_lossy_into(&mut lossy);
}
Cow::Owned(lossy)
}
}
}
#[cfg(feature = "alloc")]
#[inline]
fn to_str_lossy_into(&self, dest: &mut String) {
let mut bytes = self.as_bytes();
dest.reserve(bytes.len());
loop {
match utf8::validate(bytes) {
Ok(()) => {
dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
break;
}
Err(err) => {
let (valid, after) = bytes.split_at(err.valid_up_to());
dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
dest.push_str("\u{FFFD}");
match err.error_len() {
None => break,
Some(len) => bytes = &after[len..],
}
}
}
}
}
#[cfg(feature = "std")]
#[inline]
fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
#[cfg(unix)]
#[inline]
fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
use std::os::unix::ffi::OsStrExt;
Ok(OsStr::from_bytes(bytes))
}
#[cfg(not(unix))]
#[inline]
fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
bytes.to_str().map(OsStr::new)
}
imp(self.as_bytes())
}
#[cfg(feature = "std")]
#[inline]
fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
#[cfg(unix)]
#[inline]
fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
use std::os::unix::ffi::OsStrExt;
Cow::Borrowed(OsStr::from_bytes(bytes))
}
#[cfg(not(unix))]
#[inline]
fn imp(bytes: &[u8]) -> Cow<OsStr> {
use std::ffi::OsString;
match bytes.to_str_lossy() {
Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
Cow::Owned(x) => Cow::Owned(OsString::from(x)),
}
}
imp(self.as_bytes())
}
#[cfg(feature = "std")]
#[inline]
fn to_path(&self) -> Result<&Path, Utf8Error> {
self.to_os_str().map(Path::new)
}
#[cfg(feature = "std")]
#[inline]
fn to_path_lossy(&self) -> Cow<'_, Path> {
use std::path::PathBuf;
match self.to_os_str_lossy() {
Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
}
}
#[cfg(feature = "alloc")]
#[inline]
fn repeatn(&self, n: usize) -> Vec<u8> {
self.as_bytes().repeat(n)
}
#[inline]
fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
self.find(needle).is_some()
}
#[inline]
fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
self.as_bytes().starts_with(prefix.as_ref())
}
#[inline]
fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
self.as_bytes().ends_with(suffix.as_ref())
}
#[inline]
fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
Finder::new(needle.as_ref()).find(self.as_bytes())
}
#[inline]
fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
}
#[inline]
fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
&'h self,
needle: &'n B,
) -> Find<'h, 'n> {
Find::new(self.as_bytes(), needle.as_ref())
}
#[inline]
fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
&'h self,
needle: &'n B,
) -> FindReverse<'h, 'n> {
FindReverse::new(self.as_bytes(), needle.as_ref())
}
#[inline]
fn find_byte(&self, byte: u8) -> Option<usize> {
memchr(byte, self.as_bytes())
}
#[inline]
fn rfind_byte(&self, byte: u8) -> Option<usize> {
memrchr(byte, self.as_bytes())
}
#[inline]
fn find_char(&self, ch: char) -> Option<usize> {
self.find(ch.encode_utf8(&mut [0; 4]))
}
#[inline]
fn rfind_char(&self, ch: char) -> Option<usize> {
self.rfind(ch.encode_utf8(&mut [0; 4]))
}
#[inline]
fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
byteset::find(self.as_bytes(), byteset.as_ref())
}
#[inline]
fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
byteset::find_not(self.as_bytes(), byteset.as_ref())
}
#[inline]
fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
byteset::rfind(self.as_bytes(), byteset.as_ref())
}
#[inline]
fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
byteset::rfind_not(self.as_bytes(), byteset.as_ref())
}
#[cfg(feature = "unicode")]
#[inline]
fn fields(&self) -> Fields<'_> {
Fields::new(self.as_bytes())
}
#[inline]
fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
FieldsWith::new(self.as_bytes(), f)
}
#[inline]
fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
splitter: &'s B,
) -> Split<'h, 's> {
Split::new(self.as_bytes(), splitter.as_ref())
}
#[inline]
fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
splitter: &'s B,
) -> SplitReverse<'h, 's> {
SplitReverse::new(self.as_bytes(), splitter.as_ref())
}
#[inline]
fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
&'a self,
splitter: &B,
) -> Option<(&'a [u8], &'a [u8])> {
let bytes = self.as_bytes();
let splitter = splitter.as_ref();
let start = Finder::new(splitter).find(bytes)?;
let end = start + splitter.len();
Some((&bytes[..start], &bytes[end..]))
}
#[inline]
fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
&'a self,
splitter: &B,
) -> Option<(&'a [u8], &'a [u8])> {
let bytes = self.as_bytes();
let splitter = splitter.as_ref();
let start = FinderReverse::new(splitter).rfind(bytes)?;
let end = start + splitter.len();
Some((&bytes[..start], &bytes[end..]))
}
#[inline]
fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
limit: usize,
splitter: &'s B,
) -> SplitN<'h, 's> {
SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
}
#[inline]
fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
limit: usize,
splitter: &'s B,
) -> SplitNReverse<'h, 's> {
SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
}
#[cfg(feature = "alloc")]
#[inline]
fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
&self,
needle: N,
replacement: R,
) -> Vec<u8> {
let mut dest = Vec::with_capacity(self.as_bytes().len());
self.replace_into(needle, replacement, &mut dest);
dest
}
#[cfg(feature = "alloc")]
#[inline]
fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
&self,
needle: N,
replacement: R,
limit: usize,
) -> Vec<u8> {
let mut dest = Vec::with_capacity(self.as_bytes().len());
self.replacen_into(needle, replacement, limit, &mut dest);
dest
}
#[cfg(feature = "alloc")]
#[inline]
fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
&self,
needle: N,
replacement: R,
dest: &mut Vec<u8>,
) {
let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
let mut last = 0;
for start in self.find_iter(needle) {
dest.push_str(&self.as_bytes()[last..start]);
dest.push_str(replacement);
last = start + needle.len();
}
dest.push_str(&self.as_bytes()[last..]);
}
#[cfg(feature = "alloc")]
#[inline]
fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
&self,
needle: N,
replacement: R,
limit: usize,
dest: &mut Vec<u8>,
) {
let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
let mut last = 0;
for start in self.find_iter(needle).take(limit) {
dest.push_str(&self.as_bytes()[last..start]);
dest.push_str(replacement);
last = start + needle.len();
}
dest.push_str(&self.as_bytes()[last..]);
}
#[inline]
fn bytes(&self) -> Bytes<'_> {
Bytes { it: self.as_bytes().iter() }
}
#[inline]
fn chars(&self) -> Chars<'_> {
Chars::new(self.as_bytes())
}
#[inline]
fn char_indices(&self) -> CharIndices<'_> {
CharIndices::new(self.as_bytes())
}
#[inline]
fn utf8_chunks(&self) -> Utf8Chunks<'_> {
Utf8Chunks { bytes: self.as_bytes() }
}
#[cfg(feature = "unicode")]
#[inline]
fn graphemes(&self) -> Graphemes<'_> {
Graphemes::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn grapheme_indices(&self) -> GraphemeIndices<'_> {
GraphemeIndices::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn words(&self) -> Words<'_> {
Words::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn word_indices(&self) -> WordIndices<'_> {
WordIndices::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
WordsWithBreaks::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
WordsWithBreakIndices::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn sentences(&self) -> Sentences<'_> {
Sentences::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn sentence_indices(&self) -> SentenceIndices<'_> {
SentenceIndices::new(self.as_bytes())
}
#[inline]
fn lines(&self) -> Lines<'_> {
Lines::new(self.as_bytes())
}
#[inline]
fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
LinesWithTerminator::new(self.as_bytes())
}
#[cfg(feature = "unicode")]
#[inline]
fn trim(&self) -> &[u8] {
self.trim_start().trim_end()
}
#[cfg(feature = "unicode")]
#[inline]
fn trim_start(&self) -> &[u8] {
let start = whitespace_len_fwd(self.as_bytes());
&self.as_bytes()[start..]
}
#[cfg(feature = "unicode")]
#[inline]
fn trim_end(&self) -> &[u8] {
let end = whitespace_len_rev(self.as_bytes());
&self.as_bytes()[..end]
}
#[inline]
fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
self.trim_start_with(&mut trim).trim_end_with(&mut trim)
}
#[inline]
fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
for (s, _, ch) in self.char_indices() {
if !trim(ch) {
return &self.as_bytes()[s..];
}
}
b""
}
#[inline]
fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
for (_, e, ch) in self.char_indices().rev() {
if !trim(ch) {
return &self.as_bytes()[..e];
}
}
b""
}
#[cfg(all(feature = "alloc", feature = "unicode"))]
#[inline]
fn to_lowercase(&self) -> Vec<u8> {
let mut buf = vec![];
self.to_lowercase_into(&mut buf);
buf
}
#[cfg(all(feature = "alloc", feature = "unicode"))]
#[inline]
fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
buf.reserve(self.as_bytes().len());
for (s, e, ch) in self.char_indices() {
if ch == '\u{FFFD}' {
buf.push_str(&self.as_bytes()[s..e]);
} else if ch.is_ascii() {
buf.push_char(ch.to_ascii_lowercase());
} else {
for upper in ch.to_lowercase() {
buf.push_char(upper);
}
}
}
}
#[cfg(feature = "alloc")]
#[inline]
fn to_ascii_lowercase(&self) -> Vec<u8> {
self.as_bytes().to_ascii_lowercase()
}
#[inline]
fn make_ascii_lowercase(&mut self) {
self.as_bytes_mut().make_ascii_lowercase();
}
#[cfg(all(feature = "alloc", feature = "unicode"))]
#[inline]
fn to_uppercase(&self) -> Vec<u8> {
let mut buf = vec![];
self.to_uppercase_into(&mut buf);
buf
}
#[cfg(all(feature = "alloc", feature = "unicode"))]
#[inline]
fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
buf.reserve(self.as_bytes().len());
for (s, e, ch) in self.char_indices() {
if ch == '\u{FFFD}' {
buf.push_str(&self.as_bytes()[s..e]);
} else if ch.is_ascii() {
buf.push_char(ch.to_ascii_uppercase());
} else {
for upper in ch.to_uppercase() {
buf.push_char(upper);
}
}
}
}
#[cfg(feature = "alloc")]
#[inline]
fn to_ascii_uppercase(&self) -> Vec<u8> {
self.as_bytes().to_ascii_uppercase()
}
#[inline]
fn make_ascii_uppercase(&mut self) {
self.as_bytes_mut().make_ascii_uppercase();
}
#[inline]
fn escape_bytes(&self) -> EscapeBytes<'_> {
EscapeBytes::new(self.as_bytes())
}
#[inline]
fn reverse_bytes(&mut self) {
self.as_bytes_mut().reverse();
}
#[inline]
fn reverse_chars(&mut self) {
let mut i = 0;
loop {
let (_, size) = utf8::decode(&self.as_bytes()[i..]);
if size == 0 {
break;
}
if size > 1 {
self.as_bytes_mut()[i..i + size].reverse_bytes();
}
i += size;
}
self.reverse_bytes();
}
#[cfg(feature = "unicode")]
#[inline]
fn reverse_graphemes(&mut self) {
use crate::unicode::decode_grapheme;
let mut i = 0;
loop {
let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
if size == 0 {
break;
}
if size > 1 {
self.as_bytes_mut()[i..i + size].reverse_bytes();
}
i += size;
}
self.reverse_bytes();
}
#[inline]
fn is_ascii(&self) -> bool {
ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
}
#[inline]
fn is_utf8(&self) -> bool {
utf8::validate(self.as_bytes()).is_ok()
}
#[inline]
fn last_byte(&self) -> Option<u8> {
let bytes = self.as_bytes();
bytes.last().copied()
}
#[inline]
fn find_non_ascii_byte(&self) -> Option<usize> {
let index = ascii::first_non_ascii_byte(self.as_bytes());
if index == self.as_bytes().len() {
None
} else {
Some(index)
}
}
}
#[derive(Clone, Debug)]
pub struct Finder<'a>(memmem::Finder<'a>);
impl<'a> Finder<'a> {
#[inline]
pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
Finder(memmem::Finder::new(needle.as_ref()))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn into_owned(self) -> Finder<'static> {
Finder(self.0.into_owned())
}
#[inline]
pub fn needle(&self) -> &[u8] {
self.0.needle()
}
#[inline]
pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
self.0.find(haystack.as_ref())
}
}
#[derive(Clone, Debug)]
pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
impl<'a> FinderReverse<'a> {
#[inline]
pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
FinderReverse(memmem::FinderRev::new(needle.as_ref()))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn into_owned(self) -> FinderReverse<'static> {
FinderReverse(self.0.into_owned())
}
#[inline]
pub fn needle(&self) -> &[u8] {
self.0.needle()
}
#[inline]
pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
self.0.rfind(haystack.as_ref())
}
}
#[derive(Clone, Debug)]
pub struct Find<'h, 'n> {
it: memmem::FindIter<'h, 'n>,
haystack: &'h [u8],
needle: &'n [u8],
}
impl<'h, 'n> Find<'h, 'n> {
fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
Find { it: memmem::find_iter(haystack, needle), haystack, needle }
}
}
impl<'h, 'n> Iterator for Find<'h, 'n> {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<usize> {
self.it.next()
}
}
#[derive(Clone, Debug)]
pub struct FindReverse<'h, 'n> {
it: memmem::FindRevIter<'h, 'n>,
haystack: &'h [u8],
needle: &'n [u8],
}
impl<'h, 'n> FindReverse<'h, 'n> {
fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
FindReverse {
it: memmem::rfind_iter(haystack, needle),
haystack,
needle,
}
}
fn haystack(&self) -> &'h [u8] {
self.haystack
}
fn needle(&self) -> &'n [u8] {
self.needle
}
}
impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<usize> {
self.it.next()
}
}
#[derive(Clone, Debug)]
pub struct Bytes<'a> {
it: slice::Iter<'a, u8>,
}
impl<'a> Bytes<'a> {
#[inline]
pub fn as_bytes(&self) -> &'a [u8] {
self.it.as_slice()
}
}
impl<'a> Iterator for Bytes<'a> {
type Item = u8;
#[inline]
fn next(&mut self) -> Option<u8> {
self.it.next().copied()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
}
impl<'a> DoubleEndedIterator for Bytes<'a> {
#[inline]
fn next_back(&mut self) -> Option<u8> {
self.it.next_back().copied()
}
}
impl<'a> ExactSizeIterator for Bytes<'a> {
#[inline]
fn len(&self) -> usize {
self.it.len()
}
}
impl<'a> iter::FusedIterator for Bytes<'a> {}
#[cfg(feature = "unicode")]
#[derive(Clone, Debug)]
pub struct Fields<'a> {
it: FieldsWith<'a, fn(char) -> bool>,
}
#[cfg(feature = "unicode")]
impl<'a> Fields<'a> {
fn new(bytes: &'a [u8]) -> Fields<'a> {
Fields { it: bytes.fields_with(char::is_whitespace) }
}
}
#[cfg(feature = "unicode")]
impl<'a> Iterator for Fields<'a> {
type Item = &'a [u8];
#[inline]
fn next(&mut self) -> Option<&'a [u8]> {
self.it.next()
}
}
#[derive(Clone, Debug)]
pub struct FieldsWith<'a, F> {
f: F,
bytes: &'a [u8],
chars: CharIndices<'a>,
}
impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
FieldsWith { f, bytes, chars: bytes.char_indices() }
}
}
impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
type Item = &'a [u8];
#[inline]
fn next(&mut self) -> Option<&'a [u8]> {
let (start, mut end);
loop {
match self.chars.next() {
None => return None,
Some((s, e, ch)) => {
if !(self.f)(ch) {
start = s;
end = e;
break;
}
}
}
}
for (_, e, ch) in self.chars.by_ref() {
if (self.f)(ch) {
break;
}
end = e;
}
Some(&self.bytes[start..end])
}
}
#[derive(Clone, Debug)]
pub struct Split<'h, 's> {
finder: Find<'h, 's>,
last: usize,
done: bool,
}
impl<'h, 's> Split<'h, 's> {
fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
let finder = haystack.find_iter(splitter);
Split { finder, last: 0, done: false }
}
}
impl<'h, 's> Iterator for Split<'h, 's> {
type Item = &'h [u8];
#[inline]
fn next(&mut self) -> Option<&'h [u8]> {
let haystack = self.finder.haystack;
match self.finder.next() {
Some(start) => {
let next = &haystack[self.last..start];
self.last = start + self.finder.needle.len();
Some(next)
}
None => {
if self.last >= haystack.len() {
if !self.done {
self.done = true;
Some(b"")
} else {
None
}
} else {
let s = &haystack[self.last..];
self.last = haystack.len();
self.done = true;
Some(s)
}
}
}
}
}
#[derive(Clone, Debug)]
pub struct SplitReverse<'h, 's> {
finder: FindReverse<'h, 's>,
last: usize,
done: bool,
}
impl<'h, 's> SplitReverse<'h, 's> {
fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
let finder = haystack.rfind_iter(splitter);
SplitReverse { finder, last: haystack.len(), done: false }
}
}
impl<'h, 's> Iterator for SplitReverse<'h, 's> {
type Item = &'h [u8];
#[inline]
fn next(&mut self) -> Option<&'h [u8]> {
let haystack = self.finder.haystack();
match self.finder.next() {
Some(start) => {
let nlen = self.finder.needle().len();
let next = &haystack[start + nlen..self.last];
self.last = start;
Some(next)
}
None => {
if self.last == 0 {
if !self.done {
self.done = true;
Some(b"")
} else {
None
}
} else {
let s = &haystack[..self.last];
self.last = 0;
self.done = true;
Some(s)
}
}
}
}
}
#[derive(Clone, Debug)]
pub struct SplitN<'h, 's> {
split: Split<'h, 's>,
limit: usize,
count: usize,
}
impl<'h, 's> SplitN<'h, 's> {
fn new(
haystack: &'h [u8],
splitter: &'s [u8],
limit: usize,
) -> SplitN<'h, 's> {
let split = haystack.split_str(splitter);
SplitN { split, limit, count: 0 }
}
}
impl<'h, 's> Iterator for SplitN<'h, 's> {
type Item = &'h [u8];
#[inline]
fn next(&mut self) -> Option<&'h [u8]> {
self.count += 1;
if self.count > self.limit || self.split.done {
None
} else if self.count == self.limit {
Some(&self.split.finder.haystack[self.split.last..])
} else {
self.split.next()
}
}
}
#[derive(Clone, Debug)]
pub struct SplitNReverse<'h, 's> {
split: SplitReverse<'h, 's>,
limit: usize,
count: usize,
}
impl<'h, 's> SplitNReverse<'h, 's> {
fn new(
haystack: &'h [u8],
splitter: &'s [u8],
limit: usize,
) -> SplitNReverse<'h, 's> {
let split = haystack.rsplit_str(splitter);
SplitNReverse { split, limit, count: 0 }
}
}
impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
type Item = &'h [u8];
#[inline]
fn next(&mut self) -> Option<&'h [u8]> {
self.count += 1;
if self.count > self.limit || self.split.done {
None
} else if self.count == self.limit {
Some(&self.split.finder.haystack()[..self.split.last])
} else {
self.split.next()
}
}
}
#[derive(Clone, Debug)]
pub struct Lines<'a> {
it: LinesWithTerminator<'a>,
}
impl<'a> Lines<'a> {
fn new(bytes: &'a [u8]) -> Lines<'a> {
Lines { it: LinesWithTerminator::new(bytes) }
}
pub fn as_bytes(&self) -> &'a [u8] {
self.it.bytes
}
}
impl<'a> Iterator for Lines<'a> {
type Item = &'a [u8];
#[inline]
fn next(&mut self) -> Option<&'a [u8]> {
Some(trim_last_terminator(self.it.next()?))
}
}
impl<'a> DoubleEndedIterator for Lines<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
Some(trim_last_terminator(self.it.next_back()?))
}
}
impl<'a> iter::FusedIterator for Lines<'a> {}
#[derive(Clone, Debug)]
pub struct LinesWithTerminator<'a> {
bytes: &'a [u8],
}
impl<'a> LinesWithTerminator<'a> {
fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
LinesWithTerminator { bytes }
}
pub fn as_bytes(&self) -> &'a [u8] {
self.bytes
}
}
impl<'a> Iterator for LinesWithTerminator<'a> {
type Item = &'a [u8];
#[inline]
fn next(&mut self) -> Option<&'a [u8]> {
match self.bytes.find_byte(b'\n') {
None if self.bytes.is_empty() => None,
None => {
let line = self.bytes;
self.bytes = b"";
Some(line)
}
Some(end) => {
let line = &self.bytes[..=end];
self.bytes = &self.bytes[end + 1..];
Some(line)
}
}
}
}
impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
let end = self.bytes.len().checked_sub(1)?;
match self.bytes[..end].rfind_byte(b'\n') {
None => {
let line = self.bytes;
self.bytes = b"";
Some(line)
}
Some(end) => {
let line = &self.bytes[end + 1..];
self.bytes = &self.bytes[..=end];
Some(line)
}
}
}
}
impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
if s.last_byte() == Some(b'\n') {
s = &s[..s.len() - 1];
if s.last_byte() == Some(b'\r') {
s = &s[..s.len() - 1];
}
}
s
}
#[cfg(all(test, feature = "std"))]
mod tests {
use alloc::{string::String, vec::Vec};
use crate::{
ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
tests::LOSSY_TESTS,
};
#[test]
fn to_str_lossy() {
for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
let got = B(input).to_str_lossy();
assert_eq!(
expected.as_bytes(),
got.as_bytes(),
"to_str_lossy(ith: {:?}, given: {:?})",
i,
input,
);
let mut got = String::new();
B(input).to_str_lossy_into(&mut got);
assert_eq!(
expected.as_bytes(),
got.as_bytes(),
"to_str_lossy_into",
);
let got = String::from_utf8_lossy(input);
assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
}
}
#[test]
fn lines_iteration() {
macro_rules! t {
($it:expr, $forward:expr) => {
let mut res: Vec<&[u8]> = Vec::from($forward);
assert_eq!($it.collect::<Vec<_>>(), res);
res.reverse();
assert_eq!($it.rev().collect::<Vec<_>>(), res);
};
}
t!(Lines::new(b""), []);
t!(LinesWithTerminator::new(b""), []);
t!(Lines::new(b"\n"), [B("")]);
t!(Lines::new(b"\r\n"), [B("")]);
t!(LinesWithTerminator::new(b"\n"), [B("\n")]);
t!(Lines::new(b"a"), [B("a")]);
t!(LinesWithTerminator::new(b"a"), [B("a")]);
t!(Lines::new(b"abc"), [B("abc")]);
t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
t!(Lines::new(b"abc\n"), [B("abc")]);
t!(Lines::new(b"abc\r\n"), [B("abc")]);
t!(LinesWithTerminator::new(b"abc\n"), [B("abc\n")]);
t!(Lines::new(b"abc\n\n"), [B("abc"), B("")]);
t!(LinesWithTerminator::new(b"abc\n\n"), [B("abc\n"), B("\n")]);
t!(Lines::new(b"abc\n\ndef"), [B("abc"), B(""), B("def")]);
t!(
LinesWithTerminator::new(b"abc\n\ndef"),
[B("abc\n"), B("\n"), B("def")]
);
t!(Lines::new(b"abc\n\ndef\n"), [B("abc"), B(""), B("def")]);
t!(
LinesWithTerminator::new(b"abc\n\ndef\n"),
[B("abc\n"), B("\n"), B("def\n")]
);
t!(Lines::new(b"\na\nb\n"), [B(""), B("a"), B("b")]);
t!(
LinesWithTerminator::new(b"\na\nb\n"),
[B("\n"), B("a\n"), B("b\n")]
);
t!(Lines::new(b"\n\n\n"), [B(""), B(""), B("")]);
t!(LinesWithTerminator::new(b"\n\n\n"), [B("\n"), B("\n"), B("\n")]);
}
}