[go: up one dir, main page]

ron 0.1.0

Rusty Object Notation
Documentation
use std::fmt::{Display, Formatter, Result as FmtResult};
use std::ops::Neg;
use std::str::{FromStr, from_utf8, from_utf8_unchecked};

use de::{Error, ParseError, Result};

const DIGITS: &[u8] = b"0123456789";
const FLOAT_CHARS: &[u8] = b"0123456789.+-eE";
const IDENT_FIRST: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
const IDENT_CHAR: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789";
const WHITE_SPACE: &[u8] = b"\n\t\r ";

#[derive(Clone, Copy, Debug)]
pub struct Bytes<'a> {
    bytes: &'a [u8],
    column: usize,
    line: usize,
}

impl<'a> Bytes<'a> {
    pub fn new(bytes: &'a [u8]) -> Self {
        Bytes {
            bytes,
            column: 1,
            line: 1,
        }
    }

    pub fn advance(&mut self, bytes: usize) -> Result<()> {
        for _ in 0..bytes {
            self.advance_single()?;
        }

        Ok(())
    }

    pub fn advance_single(&mut self) -> Result<()> {
        if self.peek().ok_or(self.error(ParseError::Eof))? == b'\n' {
            self.line += 1;
            self.column = 1;
        } else {
            self.column += 1;
        }

        self.bytes = &self.bytes[1..];

        Ok(())
    }

    pub fn bool(&mut self) -> Result<bool> {
        if self.consume("true") {
            Ok(true)
        } else if self.consume("false") {
            Ok(false)
        } else {
            self.err(ParseError::ExpectedBoolean)
        }
    }

    pub fn bytes(&self) -> &[u8] {
        &self.bytes
    }

    pub fn char(&mut self) -> Result<char> {
        if !self.consume("'") {
            return self.err(ParseError::ExpectedChar);
        }

        let c = self.eat_byte()?;

        let c = if c == b'\\' {
            let c = self.eat_byte()?;

            if c != b'\\' && c != b'\'' {
                return self.err(ParseError::InvalidEscape);
            }

            c
        } else {
            c
        };

        if !self.consume("'") {
            return self.err(ParseError::ExpectedChar);
        }

        Ok(c as char)
    }

    pub fn comma(&mut self) -> bool {
        if self.consume(",") {
            self.skip_ws();

            true
        } else {
            false
        }
    }

    pub fn consume(&mut self, s: &str) -> bool {
        if s.bytes().enumerate().all(|(i, b)| self.bytes.get(i).map(|t| *t == b).unwrap_or(false)) {
            let _ = self.advance(s.len());

            true
        } else {
            false
        }
    }

    pub fn eat_byte(&mut self) -> Result<u8> {
        if let Some(peek) = self.peek() {
            let _ = self.advance_single();

            Ok(peek)
        } else {
            self.err(ParseError::Eof)
        }
    }

    pub fn err<T>(&self, kind: ParseError) -> Result<T> {
        Err(self.error(kind))
    }

    pub fn error(&self, kind: ParseError) -> Error {
        Error::Parser(kind, Position { line: self.line, col: self.column })
    }

    pub fn float<T>(&mut self) -> Result<T>
        where T: FromStr
    {
        let num_bytes = self.next_bytes_contained_in(FLOAT_CHARS);

        let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) };
        let res = FromStr::from_str(s).map_err(|_| self.error(ParseError::ExpectedFloat));

        let _ = self.advance(num_bytes);

        res
    }

    pub fn identifier(&mut self) -> Result<&[u8]> {
        if IDENT_FIRST.contains(&self.peek().ok_or(self.error(ParseError::Eof))?) {
            let bytes = self.next_bytes_contained_in(IDENT_CHAR);

            let ident = &self.bytes[..bytes];
            let _ = self.advance(bytes);

            Ok(ident)
        } else {
            self.err(ParseError::ExpectedIdentifier)
        }
    }

    pub fn next_bytes_contained_in(&self, allowed: &[u8]) -> usize {
        (0..self.bytes.len())
            .flat_map(|i| self.bytes.get(i))
            .take_while(|b| allowed.contains(b))
            .fold(0, |acc, _| acc + 1)
    }

    pub fn skip_ws(&mut self) {
        while self.peek().map(|c| WHITE_SPACE.contains(&c)).unwrap_or(false) {
            let _ = self.advance_single();
        }

        if self.skip_comment() {
            self.skip_ws();
        }
    }

    pub fn peek(&self) -> Option<u8> {
        self.bytes.get(0).map(|b| *b)
    }

    pub fn signed_integer<T>(&mut self) -> Result<T> where T: FromStr + Neg<Output=T> {
        match self.peek() {
            Some(b'+') => {
                let _ = self.advance_single();

                self.unsigned_integer()
            }
            Some(b'-') => {
                let _ = self.advance_single();

                self.unsigned_integer::<T>().map(Neg::neg)
            }
            Some(_) => self.unsigned_integer(),
            None => self.err(ParseError::Eof),
        }
    }

    pub fn string(&mut self) -> Result<ParsedStr> {
        if !self.consume("\"") {
            return self.err(ParseError::ExpectedString);
        }

        let (i, end_or_escape) = (0..)
            .flat_map(|i| self.bytes.get(i))
            .enumerate()
            .find(|&(_, &b)| b == b'\\' || b == b'"')
            .ok_or(self.error(ParseError::Eof))?;

        if *end_or_escape == b'"' {
            let s = from_utf8(&self.bytes[..i]).map_err(|e| self.error(e.into()))?;

            // Advance by the number of bytes of the string
            // + 1 for the `"`.
            let _ = self.advance(i + 1);

            Ok(ParsedStr::Slice(s))
        } else {
            let mut i = i;
            let mut s: Vec<_> = self.bytes[..i].to_vec();

            loop {
                let _ = self.advance(i + 1);
                self.parse_str_escape(&mut s)?;

                let (new_i, end_or_escape) = (0..)
                    .flat_map(|i| self.bytes.get(i))
                    .enumerate()
                    .find(|&(_, &b)| b == b'\\' || b == b'"')
                    .ok_or(ParseError::Eof)
                    .map_err(|e| self.error(e))?;

                i = new_i;
                s.extend_from_slice(&self.bytes[..i]);

                if *end_or_escape == b'"' {
                    let _ = self.advance(i + 1);

                    break Ok(ParsedStr::Allocated(String::from_utf8(s)
                        .map_err(|e| self.error(e.into()))?));
                }
            }
        }
    }

    pub fn unsigned_integer<T>(&mut self) -> Result<T> where T: FromStr {
        let num_bytes = self.next_bytes_contained_in(DIGITS);

        if num_bytes == 0 {
            return self.err(ParseError::Eof);
        }

        let res = FromStr::from_str(unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) })
            .map_err(|_| self.error(ParseError::ExpectedInteger));

        let _ = self.advance(num_bytes);

        res
    }

    fn decode_hex_escape(&mut self) -> Result<u16> {
        let mut n = 0;
        for _ in 0..4 {
            n = match self.eat_byte()? {
                c @ b'0' ... b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)),
                b'a' | b'A' => n * 16_u16 + 10_u16,
                b'b' | b'B' => n * 16_u16 + 11_u16,
                b'c' | b'C' => n * 16_u16 + 12_u16,
                b'd' | b'D' => n * 16_u16 + 13_u16,
                b'e' | b'E' => n * 16_u16 + 14_u16,
                b'f' | b'F' => n * 16_u16 + 15_u16,
                _ => {
                    return self.err(ParseError::InvalidEscape);
                }
            };
        }

        Ok(n)
    }

    fn parse_str_escape(&mut self, store: &mut Vec<u8>) -> Result<()> {
        use std::iter::repeat;

        match self.eat_byte()? {
            b'"' => store.push(b'"'),
            b'\\' => store.push(b'\\'),
            b'b' => store.push(b'\x08'),
            b'f' => store.push(b'\x0c'),
            b'n' => store.push(b'\n'),
            b'r' => store.push(b'\r'),
            b't' => store.push(b'\t'),
            b'u' => {
                let c: char = match self.decode_hex_escape()? {
                    0xDC00 ... 0xDFFF => {
                        return self.err(ParseError::InvalidEscape);
                    }

                    n1 @ 0xD800 ... 0xDBFF => {
                        if self.eat_byte()? != b'\\' {
                            return self.err(ParseError::InvalidEscape);
                        }

                        if self.eat_byte()? != b'u' {
                            return self.err(ParseError::InvalidEscape);
                        }

                        let n2 = self.decode_hex_escape()?;

                        if n2 < 0xDC00 || n2 > 0xDFFF {
                            return self.err(ParseError::InvalidEscape);
                        }

                        let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;

                        match ::std::char::from_u32(n as u32) {
                            Some(c) => c,
                            None => {
                                return self.err(ParseError::InvalidEscape);
                            }
                        }
                    }

                    n => {
                        match ::std::char::from_u32(n as u32) {
                            Some(c) => c,
                            None => {
                                return self.err(ParseError::InvalidEscape);
                            }
                        }
                    }
                };

                let char_start = store.len();
                store.extend(repeat(0).take(c.len_utf8()));
                c.encode_utf8(&mut store[char_start..]);
            }
            _ => {
                return self.err(ParseError::InvalidEscape);
            }
        }

        Ok(())
    }

    fn skip_comment(&mut self) -> bool {
        if self.consume("//") {
            let bytes = self.bytes.iter().take_while(|&&b| b != b'\n').count();

            let _ = self.advance(bytes);

            true
        } else {
            false
        }
    }
}

#[derive(Clone, Debug)]
pub enum ParsedStr<'a> {
    Allocated(String),
    Slice(&'a str),
}

#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Position {
    pub col: usize,
    pub line: usize,
}

impl Display for Position {
    fn fmt(&self, f: &mut Formatter) -> FmtResult {
        write!(f, "{}:{}", self.line, self.col)
    }
}