pub mod attributes;
use std::borrow::Cow;
use std::str::from_utf8;
use std::ops::Deref;
use encoding_rs::Encoding;
use std::io::BufRead;
use escape::unescape;
use self::attributes::{Attributes, Attribute};
use errors::Result;
use reader::Reader;
#[derive(Clone, Debug)]
pub struct BytesStart<'a> {
buf: Cow<'a, [u8]>,
name_len: usize,
}
impl<'a> BytesStart<'a> {
#[inline]
pub fn borrowed(content: &'a [u8], name_len: usize) -> BytesStart<'a> {
BytesStart {
buf: Cow::Borrowed(content),
name_len: name_len,
}
}
#[inline]
pub fn owned(content: Vec<u8>, name_len: usize) -> BytesStart<'static> {
BytesStart {
buf: Cow::Owned(content),
name_len: name_len,
}
}
pub fn into_owned(self) -> BytesStart<'static> {
BytesStart {
buf: Cow::Owned(self.buf.into_owned()),
name_len: self.name_len,
}
}
pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
where I: IntoIterator,
I::Item: Into<Attribute<'b>>
{
self.extend_attributes(attributes);
self
}
pub fn name(&self) -> &[u8] {
&self.buf[..self.name_len]
}
#[inline]
pub fn local_name(&self) -> &[u8] {
if let Some(i) = self.name().iter().position(|b| *b == b':') {
&self.name()[i + 1..]
} else {
self.name()
}
}
pub fn unescaped(&self) -> Result<Cow<[u8]>> {
unescape(&*self.buf)
}
pub fn attributes(&self) -> Attributes {
Attributes::new(&self, self.name_len)
}
pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
where I: IntoIterator,
I::Item: Into<Attribute<'b>>
{
for attr in attributes {
self.push_attribute(attr);
}
self
}
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
self.unescaped().map(|e| reader.decode(&*e).into_owned())
}
pub fn push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A) {
let a = attr.into();
let bytes = self.buf.to_mut();
bytes.push(b' ');
bytes.extend_from_slice(a.key);
bytes.extend_from_slice(b"=\"");
bytes.extend_from_slice(a.value);
bytes.push(b'"');
}
}
#[derive(Clone, Debug)]
pub struct BytesDecl<'a> {
element: BytesStart<'a>,
}
impl<'a> BytesDecl<'a> {
pub fn from_start(start: BytesStart<'a>) -> BytesDecl<'a> {
BytesDecl { element: start }
}
pub fn version(&self) -> Result<&[u8]> {
match self.element.attributes().next() {
Some(Err(e)) => Err(e),
Some(Ok(Attribute {
key: b"version",
value: v,
})) => Ok(v),
Some(Ok(a)) => {
Err(format!("XmlDecl must start with 'version' attribute, found {:?}",
from_utf8(a.key))
.into())
}
None => Err("XmlDecl must start with 'version' attribute, found none".into()),
}
}
pub fn encoding(&self) -> Option<Result<&[u8]>> {
for a in self.element.attributes() {
match a {
Err(e) => return Some(Err(e)),
Ok(Attribute {
key: b"encoding",
value: v,
}) => return Some(Ok(v)),
_ => (),
}
}
None
}
pub fn standalone(&self) -> Option<Result<&[u8]>> {
for a in self.element.attributes() {
match a {
Err(e) => return Some(Err(e)),
Ok(Attribute {
key: b"standalone",
value: v,
}) => return Some(Ok(v)),
_ => (),
}
}
None
}
pub fn new(version: &[u8],
encoding: Option<&[u8]>,
standalone: Option<&[u8]>)
-> BytesDecl<'static> {
let encoding_attr_len = if let Some(xs) = encoding {
12 + xs.len()
} else {
0
};
let standalone_attr_len = if let Some(xs) = standalone {
14 + xs.len()
} else {
0
};
let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len);
buf.extend_from_slice(b"xml version=\"");
buf.extend_from_slice(version);
if let Some(encoding_val) = encoding {
buf.extend_from_slice(b"\" encoding=\"");
buf.extend_from_slice(encoding_val);
}
if let Some(standalone_val) = standalone {
buf.extend_from_slice(b"\" standalone=\"");
buf.extend_from_slice(standalone_val);
}
buf.push(b'"');
BytesDecl { element: BytesStart::owned(buf, 3) }
}
pub fn encoder(&self) -> Option<&'static Encoding> {
self.encoding()
.and_then(|e| e.ok())
.and_then(|e| Encoding::for_label(e))
}
}
#[derive(Clone, Debug)]
pub struct BytesEnd<'a> {
name: Cow<'a, [u8]>,
}
impl<'a> BytesEnd<'a> {
#[inline]
pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> {
BytesEnd { name: Cow::Borrowed(name) }
}
#[inline]
pub fn owned(name: Vec<u8>) -> BytesEnd<'static> {
BytesEnd { name: Cow::Owned(name) }
}
#[inline]
pub fn name(&self) -> &[u8] {
&*self.name
}
#[inline]
pub fn local_name(&self) -> &[u8] {
if let Some(i) = self.name().iter().position(|b| *b == b':') {
&self.name()[i + 1..]
} else {
self.name()
}
}
}
#[derive(Clone, Debug)]
pub struct BytesText<'a> {
content: Cow<'a, [u8]>,
}
impl<'a> BytesText<'a> {
#[inline]
pub fn borrowed(content: &'a [u8]) -> BytesText<'a> {
BytesText { content: Cow::Borrowed(content) }
}
#[inline]
pub fn owned(content: Vec<u8>) -> BytesText<'static> {
BytesText { content: Cow::Owned(content) }
}
pub fn unescaped(&self) -> Result<Cow<[u8]>> {
unescape(&self)
}
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
self.unescaped().map(|e| reader.decode(&*e).into_owned())
}
}
#[derive(Clone, Debug)]
pub enum Event<'a> {
Start(BytesStart<'a>),
End(BytesEnd<'a>),
Empty(BytesStart<'a>),
Text(BytesText<'a>),
Comment(BytesText<'a>),
CData(BytesText<'a>),
Decl(BytesDecl<'a>),
PI(BytesText<'a>),
DocType(BytesText<'a>),
Eof,
}
impl<'a> Deref for BytesStart<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.buf
}
}
impl<'a> Deref for BytesDecl<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.element
}
}
impl<'a> Deref for BytesEnd<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.name
}
}
impl<'a> Deref for BytesText<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.content
}
}
impl<'a> Deref for Event<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
match *self {
Event::Start(ref e) => &*e,
Event::End(ref e) => &*e,
Event::Text(ref e) => &*e,
Event::Empty(ref e) => &*e,
Event::Decl(ref e) => &*e,
Event::PI(ref e) => &*e,
Event::CData(ref e) => &*e,
Event::Comment(ref e) => &*e,
Event::DocType(ref e) => &*e,
Event::Eof => &[],
}
}
}
#[cfg(test)]
#[test]
fn local_name() {
use std::str::from_utf8;
let xml = r#"
<foo:bus attr='bar'>foobusbar</foo:bus>
<foo: attr='bar'>foobusbar</foo:>
<:foo attr='bar'>foobusbar</:foo>
<foo:bus:baz attr='bar'>foobusbar</foo:bus:baz>
"#;
let mut rdr = Reader::from_str(xml);
let mut buf = Vec::new();
let mut parsed_local_names = Vec::new();
loop {
match rdr.read_event(&mut buf)
.expect("unable to read xml event") {
Event::Start(ref e) => {
parsed_local_names.push(from_utf8(e.local_name())
.expect("unable to build str from local_name")
.to_string())
}
Event::End(ref e) => {
parsed_local_names.push(from_utf8(e.local_name())
.expect("unable to build str from local_name")
.to_string())
}
Event::Eof => break,
_ => {}
}
}
assert_eq!(parsed_local_names[0], "bus".to_string());
assert_eq!(parsed_local_names[1], "bus".to_string());
assert_eq!(parsed_local_names[2], "".to_string());
assert_eq!(parsed_local_names[3], "".to_string());
assert_eq!(parsed_local_names[4], "foo".to_string());
assert_eq!(parsed_local_names[5], "foo".to_string());
assert_eq!(parsed_local_names[6], "bus:baz".to_string());
assert_eq!(parsed_local_names[7], "bus:baz".to_string());
}