[go: up one dir, main page]

gix-pathspec 0.4.1

A crate of the gitoxide project dealing magical pathspecs
Documentation
use std::borrow::Cow;

use bstr::{BStr, BString, ByteSlice, ByteVec};

use crate::{Defaults, MagicSignature, Pattern, SearchMode};

/// The error returned by [parse()][crate::parse()].
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
    #[error("An empty string is not a valid pathspec")]
    EmptyString,
    #[error("Found {keyword:?} in signature, which is not a valid keyword")]
    InvalidKeyword { keyword: BString },
    #[error("Unimplemented short keyword: {short_keyword:?}")]
    Unimplemented { short_keyword: char },
    #[error("Missing ')' at the end of pathspec signature")]
    MissingClosingParenthesis,
    #[error("Attribute has non-ascii characters or starts with '-': {attribute:?}")]
    InvalidAttribute { attribute: BString },
    #[error("Invalid character in attribute value: {character:?}")]
    InvalidAttributeValue { character: char },
    #[error("Escape character '\\' is not allowed as the last character in an attribute value")]
    TrailingEscapeCharacter,
    #[error("Attribute specification cannot be empty")]
    EmptyAttribute,
    #[error("Only one attribute specification is allowed in the same pathspec")]
    MultipleAttributeSpecifications,
    #[error("'literal' and 'glob' keywords cannot be used together in the same pathspec")]
    IncompatibleSearchModes,
}

impl Pattern {
    /// Try to parse a path-spec pattern from the given `input` bytes.
    pub fn from_bytes(
        input: &[u8],
        Defaults {
            signature,
            search_mode,
            literal,
        }: Defaults,
    ) -> Result<Self, Error> {
        if input.is_empty() {
            return Err(Error::EmptyString);
        }
        if literal {
            return Ok(Self::from_literal(input, signature));
        }
        if input.as_bstr() == ":" {
            return Ok(Pattern {
                nil: true,
                ..Default::default()
            });
        }

        let mut p = Pattern {
            signature,
            search_mode: SearchMode::default(),
            ..Default::default()
        };

        let mut cursor = 0;
        if input.first() == Some(&b':') {
            cursor += 1;
            p.signature |= parse_short_keywords(input, &mut cursor)?;
            if let Some(b'(') = input.get(cursor) {
                cursor += 1;
                parse_long_keywords(input, &mut p, &mut cursor)?;
            }
        }

        if search_mode != Default::default() && p.search_mode == Default::default() {
            p.search_mode = search_mode;
        }
        let mut path = &input[cursor..];
        if path.last() == Some(&b'/') {
            p.signature |= MagicSignature::MUST_BE_DIR;
            path = &path[..path.len() - 1];
        }
        p.path = path.into();
        Ok(p)
    }

    /// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and
    /// use `default_signature` as magic signature.
    pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self {
        Pattern {
            path: input.into(),
            signature: default_signature,
            search_mode: SearchMode::Literal,
            ..Default::default()
        }
    }
}

fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> {
    let unimplemented_chars = b"\"#%&'-',;<=>@_`~";

    let mut signature = MagicSignature::empty();
    while let Some(&b) = input.get(*cursor) {
        *cursor += 1;
        signature |= match b {
            b'/' => MagicSignature::TOP,
            b'^' | b'!' => MagicSignature::EXCLUDE,
            b':' => break,
            _ if unimplemented_chars.contains(&b) => {
                return Err(Error::Unimplemented {
                    short_keyword: b.into(),
                });
            }
            _ => {
                *cursor -= 1;
                break;
            }
        }
    }

    Ok(signature)
}

fn parse_long_keywords(input: &[u8], p: &mut Pattern, cursor: &mut usize) -> Result<(), Error> {
    let end = input.find(")").ok_or(Error::MissingClosingParenthesis)?;

    let input = &input[*cursor..end];
    *cursor = end + 1;

    if input.is_empty() {
        return Ok(());
    }

    split_on_non_escaped_char(input, b',', |keyword| {
        let attr_prefix = b"attr:";
        match keyword {
            b"attr" => {}
            b"top" => p.signature |= MagicSignature::TOP,
            b"icase" => p.signature |= MagicSignature::ICASE,
            b"exclude" => p.signature |= MagicSignature::EXCLUDE,
            b"literal" => match p.search_mode {
                SearchMode::PathAwareGlob => return Err(Error::IncompatibleSearchModes),
                _ => p.search_mode = SearchMode::Literal,
            },
            b"glob" => match p.search_mode {
                SearchMode::Literal => return Err(Error::IncompatibleSearchModes),
                _ => p.search_mode = SearchMode::PathAwareGlob,
            },
            _ if keyword.starts_with(attr_prefix) => {
                if p.attributes.is_empty() {
                    p.attributes = parse_attributes(&keyword[attr_prefix.len()..])?;
                } else {
                    return Err(Error::MultipleAttributeSpecifications);
                }
            }
            _ => {
                return Err(Error::InvalidKeyword {
                    keyword: BString::from(keyword),
                });
            }
        };
        Ok(())
    })
}

fn split_on_non_escaped_char(
    input: &[u8],
    split_char: u8,
    mut f: impl FnMut(&[u8]) -> Result<(), Error>,
) -> Result<(), Error> {
    let mut i = 0;
    let mut last = 0;
    for window in input.windows(2) {
        i += 1;
        if window[0] != b'\\' && window[1] == split_char {
            let keyword = &input[last..i];
            f(keyword)?;
            last = i + 1;
        }
    }
    let last_keyword = &input[last..];
    f(last_keyword)
}

fn parse_attributes(input: &[u8]) -> Result<Vec<gix_attributes::Assignment>, Error> {
    if input.is_empty() {
        return Err(Error::EmptyAttribute);
    }

    let unescaped = unescape_attribute_values(input.into())?;

    gix_attributes::parse::Iter::new(unescaped.as_bstr())
        .map(|res| res.map(gix_attributes::AssignmentRef::to_owned))
        .collect::<Result<Vec<_>, _>>()
        .map_err(|e| Error::InvalidAttribute { attribute: e.attribute })
}

fn unescape_attribute_values(input: &BStr) -> Result<Cow<'_, BStr>, Error> {
    if !input.contains(&b'=') {
        return Ok(Cow::Borrowed(input));
    }

    let mut out: Cow<'_, BStr> = Cow::Borrowed("".into());

    for attr in input.split(|&c| c == b' ') {
        let split_point = attr.find_byte(b'=').map_or_else(|| attr.len(), |i| i + 1);
        let (name, value) = attr.split_at(split_point);

        if value.contains(&b'\\') {
            let out = out.to_mut();
            out.push_str(name);
            out.push_str(unescape_and_check_attr_value(value.into())?);
            out.push(b' ');
        } else {
            check_attribute_value(value.as_bstr())?;
            match out {
                Cow::Borrowed(_) => {
                    let end = out.len() + attr.len() + 1;
                    out = Cow::Borrowed(&input[0..end.min(input.len())]);
                }
                Cow::Owned(_) => {
                    let out = out.to_mut();
                    out.push_str(name);
                    out.push_str(value);
                    out.push(b' ');
                }
            }
        }
    }

    Ok(out)
}

fn unescape_and_check_attr_value(value: &BStr) -> Result<BString, Error> {
    let mut out = BString::from(Vec::with_capacity(value.len()));
    let mut bytes = value.iter();
    while let Some(mut b) = bytes.next().copied() {
        if b == b'\\' {
            b = *bytes.next().ok_or(Error::TrailingEscapeCharacter)?;
        }

        out.push(validated_attr_value_byte(b)?);
    }
    Ok(out)
}

fn check_attribute_value(input: &BStr) -> Result<(), Error> {
    match input.iter().copied().find(|b| !is_valid_attr_value(*b)) {
        Some(b) => Err(Error::InvalidAttributeValue { character: b as char }),
        None => Ok(()),
    }
}

fn is_valid_attr_value(byte: u8) -> bool {
    byte.is_ascii_alphanumeric() || b",-_".contains(&byte)
}

fn validated_attr_value_byte(byte: u8) -> Result<u8, Error> {
    if is_valid_attr_value(byte) {
        Ok(byte)
    } else {
        Err(Error::InvalidAttributeValue {
            character: byte as char,
        })
    }
}