xml/reader.rs
1//! Contains high-level interface for a pull-based XML parser.
2//!
3//! The most important type in this module is `EventReader`, which provides an iterator
4//! view for events in XML document.
5
6use std::io::Read;
7use std::iter::FusedIterator;
8use std::result;
9
10use crate::common::{Position, TextPosition};
11
12pub use self::config::ParserConfig;
13pub use self::error::{Error, ErrorKind};
14pub use self::events::XmlEvent;
15
16// back compat
17#[doc(hidden)]
18#[deprecated(note = "Merged into ParserConfig")]
19pub type ParserConfig2 = ParserConfig;
20
21use self::parser::PullParser;
22
23mod config;
24mod error;
25mod events;
26mod indexset;
27mod lexer;
28mod parser;
29
30/// A result type yielded by `XmlReader`.
31pub type Result<T, E = Error> = result::Result<T, E>;
32
33/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
34///
35/// The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
36pub struct EventReader<R: Read> {
37 source: R,
38 parser: PullParser,
39}
40
41impl<R: Read> EventReader<R> {
42 /// Creates a new reader, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
43 #[inline]
44 pub fn new(source: R) -> Self {
45 Self::new_with_config(source, ParserConfig::new())
46 }
47
48 /// Creates a new reader with the provded configuration, consuming the given stream. The reader should be wrapped in a `BufReader`, otherwise parsing may be very slow.
49 #[inline]
50 pub fn new_with_config(source: R, config: impl Into<ParserConfig>) -> Self {
51 Self {
52 source,
53 parser: PullParser::new(config),
54 }
55 }
56
57 /// Pulls and returns next XML event from the stream.
58 ///
59 /// If this returns [Err] or [`XmlEvent::EndDocument`] then further calls to
60 /// this method will return this event again.
61 #[inline]
62 #[allow(clippy::should_implement_trait)]
63 pub fn next(&mut self) -> Result<XmlEvent> {
64 self.parser.next(&mut self.source)
65 }
66
67 /// Skips all XML events until the next end tag at the current level.
68 ///
69 /// Convenience function that is useful for the case where you have
70 /// encountered a start tag that is of no interest and want to
71 /// skip the entire XML subtree until the corresponding end tag.
72 #[inline]
73 pub fn skip(&mut self) -> Result<()> {
74 let mut depth = 1;
75
76 while depth > 0 {
77 match self.next()? {
78 XmlEvent::StartElement { .. } => depth += 1,
79 XmlEvent::EndElement { .. } => depth -= 1,
80 XmlEvent::EndDocument => return Err(Error {
81 kind: ErrorKind::UnexpectedEof,
82 pos: self.parser.position(),
83 }),
84 _ => {},
85 }
86 }
87
88 Ok(())
89 }
90
91 /// Access underlying reader
92 ///
93 /// Using it directly while the event reader is parsing is not recommended
94 pub fn source(&self) -> &R { &self.source }
95
96 /// Access underlying reader
97 ///
98 /// Using it directly while the event reader is parsing is not recommended
99 pub fn source_mut(&mut self) -> &mut R { &mut self.source }
100
101 /// Unwraps this `EventReader`, returning the underlying reader.
102 ///
103 /// Note that this operation is destructive; unwrapping the reader and wrapping it
104 /// again with `EventReader::new()` will create a fresh reader which will attempt
105 /// to parse an XML document from the beginning.
106 pub fn into_inner(self) -> R {
107 self.source
108 }
109
110 /// Returns the DOCTYPE of the document if it has already been seen
111 ///
112 /// Available only after the root `StartElement` event
113 #[inline]
114 #[deprecated(note = "there is `XmlEvent::Doctype` now")]
115 #[allow(deprecated)]
116 pub fn doctype(&self) -> Option<&str> {
117 self.parser.doctype()
118 }
119}
120
121impl<B: Read> Position for EventReader<B> {
122 /// Returns the position of the last event produced by the reader.
123 #[inline]
124 fn position(&self) -> TextPosition {
125 self.parser.position()
126 }
127}
128
129impl<R: Read> IntoIterator for EventReader<R> {
130 type IntoIter = Events<R>;
131 type Item = Result<XmlEvent>;
132
133 fn into_iter(self) -> Events<R> {
134 Events { reader: self, finished: false }
135 }
136}
137
138/// An iterator over XML events created from some type implementing `Read`.
139///
140/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
141/// it will be returned by the iterator once, and then it will stop producing events.
142pub struct Events<R: Read> {
143 reader: EventReader<R>,
144 finished: bool,
145}
146
147impl<R: Read> Events<R> {
148 /// Unwraps the iterator, returning the internal `EventReader`.
149 #[inline]
150 pub fn into_inner(self) -> EventReader<R> {
151 self.reader
152 }
153
154 /// Access the underlying reader
155 ///
156 /// It's not recommended to use it while the events are still being parsed
157 pub fn source(&self) -> &R { &self.reader.source }
158
159 /// Access the underlying reader
160 ///
161 /// It's not recommended to use it while the events are still being parsed
162 pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
163}
164
165impl<R: Read> FusedIterator for Events<R> {
166}
167
168impl<R: Read> Iterator for Events<R> {
169 type Item = Result<XmlEvent>;
170
171 #[inline]
172 fn next(&mut self) -> Option<Result<XmlEvent>> {
173 if self.finished && !self.reader.parser.is_ignoring_end_of_stream() {
174 None
175 } else {
176 let ev = self.reader.next();
177 if let Ok(XmlEvent::EndDocument) | Err(_) = ev {
178 self.finished = true;
179 }
180 Some(ev)
181 }
182 }
183}
184
185impl<'r> EventReader<&'r [u8]> {
186 /// A convenience method to create an `XmlReader` from a string slice.
187 #[inline]
188 #[must_use]
189 #[allow(clippy::should_implement_trait)]
190 pub fn from_str(source: &'r str) -> Self {
191 EventReader::new(source.as_bytes())
192 }
193}