#![forbid(unsafe_code)]
use std::borrow::Cow;
use crate::EnvError;
use crate::native_int_str::NativeCharInt;
use crate::native_int_str::NativeIntStr;
use crate::native_int_str::NativeIntString;
use crate::native_int_str::from_native_int_representation;
use crate::string_expander::StringExpander;
use crate::string_parser::StringParser;
use crate::variable_parser::VariableParser;
const BACKSLASH: char = '\\';
const DOUBLE_QUOTES: char = '\"';
const SINGLE_QUOTES: char = '\'';
const NEW_LINE: char = '\n';
const DOLLAR: char = '$';
const REPLACEMENTS: [(char, char); 9] = [
('r', '\r'),
('n', '\n'),
('t', '\t'),
('f', '\x0C'),
('v', '\x0B'),
('_', ' '),
('#', '#'),
('$', '$'),
('"', '"'),
];
const ASCII_WHITESPACE_CHARS: [char; 6] = [' ', '\t', '\r', '\n', '\x0B', '\x0C'];
pub struct SplitIterator<'a> {
expander: StringExpander<'a>,
words: Vec<Vec<NativeCharInt>>,
}
impl<'a> SplitIterator<'a> {
pub fn new(s: &'a NativeIntStr) -> Self {
Self {
expander: StringExpander::new(s),
words: Vec::new(),
}
}
fn skip_one(&mut self) -> Result<(), EnvError> {
self.expander
.get_parser_mut()
.consume_one_ascii_or_all_non_ascii()?;
Ok(())
}
fn take_one(&mut self) -> Result<(), EnvError> {
Ok(self.expander.take_one()?)
}
fn get_current_char(&self) -> Option<char> {
self.expander.peek().ok()
}
fn push_char_to_word(&mut self, c: char) {
self.expander.put_one_char(c);
}
fn push_word_to_words(&mut self) {
let word = self.expander.take_collected_output();
self.words.push(word);
}
fn get_parser(&self) -> &StringParser<'a> {
self.expander.get_parser()
}
fn get_parser_mut(&mut self) -> &mut StringParser<'a> {
self.expander.get_parser_mut()
}
fn substitute_variable<'x>(&'x mut self) -> Result<(), EnvError> {
let mut var_parse = VariableParser::<'a, '_> {
parser: self.get_parser_mut(),
};
let (name, default) = var_parse.parse_variable()?;
let varname_os_str_cow = from_native_int_representation(Cow::Borrowed(name));
let value = std::env::var_os(varname_os_str_cow);
match (&value, default) {
(None, None) => {} (Some(value), _) => {
self.expander.put_string(value);
}
(None, Some(default)) => {
self.expander.put_native_string(default);
}
}
Ok(())
}
fn check_and_replace_ascii_escape_code(&mut self, c: char) -> Result<bool, EnvError> {
if let Some(replace) = REPLACEMENTS.iter().find(|&x| x.0 == c) {
self.skip_one()?;
self.push_char_to_word(replace.1);
return Ok(true);
}
Ok(false)
}
fn make_invalid_sequence_backslash_xin_minus_s(&self, c: char) -> EnvError {
EnvError::EnvInvalidSequenceBackslashXInMinusS(
self.expander.get_parser().get_peek_position(),
c,
)
}
fn state_root(&mut self) -> Result<(), EnvError> {
loop {
match self.state_delimiter() {
Err(EnvError::EnvContinueWithDelimiter) => {}
Err(EnvError::EnvReachedEnd) => return Ok(()),
result => return result,
}
}
}
fn state_delimiter(&mut self) -> Result<(), EnvError> {
loop {
match self.get_current_char() {
None => return Ok(()),
Some('#') => {
self.skip_one()?;
self.state_comment()?;
}
Some(BACKSLASH) => {
self.skip_one()?;
self.state_delimiter_backslash()?;
}
Some(c) if ASCII_WHITESPACE_CHARS.contains(&c) => {
self.skip_one()?;
}
Some(_) => {
self.state_unquoted()?;
}
}
}
}
fn state_delimiter_backslash(&mut self) -> Result<(), EnvError> {
match self.get_current_char() {
None => Err(EnvError::EnvInvalidBackslashAtEndOfStringInMinusS(
self.get_parser().get_peek_position(),
"Delimiter".into(),
)),
Some('_' | NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some(DOLLAR | BACKSLASH | '#' | SINGLE_QUOTES | DOUBLE_QUOTES) => {
self.take_one()?;
self.state_unquoted()
}
Some('c') => Err(EnvError::EnvReachedEnd),
Some(c) if self.check_and_replace_ascii_escape_code(c)? => self.state_unquoted(),
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_unquoted(&mut self) -> Result<(), EnvError> {
loop {
match self.get_current_char() {
None => {
self.push_word_to_words();
return Err(EnvError::EnvReachedEnd);
}
Some(DOLLAR) => {
self.substitute_variable()?;
}
Some(SINGLE_QUOTES) => {
self.skip_one()?;
self.state_single_quoted()?;
}
Some(DOUBLE_QUOTES) => {
self.skip_one()?;
self.state_double_quoted()?;
}
Some(BACKSLASH) => {
self.skip_one()?;
self.state_unquoted_backslash()?;
}
Some(c) if ASCII_WHITESPACE_CHARS.contains(&c) => {
self.push_word_to_words();
self.skip_one()?;
return Ok(());
}
Some(_) => {
self.take_one()?;
}
}
}
}
fn state_unquoted_backslash(&mut self) -> Result<(), EnvError> {
match self.get_current_char() {
None => Err(EnvError::EnvInvalidBackslashAtEndOfStringInMinusS(
self.get_parser().get_peek_position(),
"Unquoted".into(),
)),
Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some('_') => {
self.skip_one()?;
self.push_word_to_words();
Err(EnvError::EnvContinueWithDelimiter)
}
Some('c') => {
self.push_word_to_words();
Err(EnvError::EnvReachedEnd)
}
Some(DOLLAR | BACKSLASH | SINGLE_QUOTES | DOUBLE_QUOTES) => {
self.take_one()?;
Ok(())
}
Some(c) if self.check_and_replace_ascii_escape_code(c)? => Ok(()),
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_single_quoted(&mut self) -> Result<(), EnvError> {
loop {
match self.get_current_char() {
None => {
return Err(EnvError::EnvMissingClosingQuote(
self.get_parser().get_peek_position(),
'\'',
));
}
Some(SINGLE_QUOTES) => {
self.skip_one()?;
return Ok(());
}
Some(BACKSLASH) => {
self.skip_one()?;
self.split_single_quoted_backslash()?;
}
Some(_) => {
self.take_one()?;
}
}
}
}
fn split_single_quoted_backslash(&mut self) -> Result<(), EnvError> {
match self.get_current_char() {
None => Err(EnvError::EnvMissingClosingQuote(
self.get_parser().get_peek_position(),
'\'',
)),
Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some(SINGLE_QUOTES | BACKSLASH) => {
self.take_one()?;
Ok(())
}
Some(c) if REPLACEMENTS.iter().any(|&x| x.0 == c) => {
self.push_char_to_word(BACKSLASH);
self.take_one()?;
Ok(())
}
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_double_quoted(&mut self) -> Result<(), EnvError> {
loop {
match self.get_current_char() {
None => {
return Err(EnvError::EnvMissingClosingQuote(
self.get_parser().get_peek_position(),
'"',
));
}
Some(DOLLAR) => {
self.substitute_variable()?;
}
Some(DOUBLE_QUOTES) => {
self.skip_one()?;
return Ok(());
}
Some(BACKSLASH) => {
self.skip_one()?;
self.state_double_quoted_backslash()?;
}
Some(_) => {
self.take_one()?;
}
}
}
}
fn state_double_quoted_backslash(&mut self) -> Result<(), EnvError> {
match self.get_current_char() {
None => Err(EnvError::EnvMissingClosingQuote(
self.get_parser().get_peek_position(),
'"',
)),
Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some(DOUBLE_QUOTES | DOLLAR | BACKSLASH) => {
self.take_one()?;
Ok(())
}
Some('c') => Err(EnvError::EnvBackslashCNotAllowedInDoubleQuotes(
self.get_parser().get_peek_position(),
)),
Some(c) if self.check_and_replace_ascii_escape_code(c)? => Ok(()),
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_comment(&mut self) -> Result<(), EnvError> {
loop {
match self.get_current_char() {
None => return Err(EnvError::EnvReachedEnd),
Some(NEW_LINE) => {
self.skip_one()?;
return Ok(());
}
Some(_) => {
self.get_parser_mut().skip_until_char_or_end(NEW_LINE);
}
}
}
}
pub fn split(mut self) -> Result<Vec<NativeIntString>, EnvError> {
self.state_root()?;
Ok(self.words)
}
}
pub fn split(s: &NativeIntStr) -> Result<Vec<NativeIntString>, EnvError> {
let split_args = SplitIterator::new(s).split()?;
Ok(split_args)
}