[go: up one dir, main page]

xml/reader/
parser.rs

1//! Contains an implementation of pull-based XML parser.
2
3use crate::reader::DoctypeRef;
4use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char};
5use crate::common::{Position, TextPosition, XmlVersion};
6use crate::name::OwnedName;
7use crate::namespace::NamespaceStack;
8use crate::reader::config::ParserConfig;
9use crate::reader::error::{ImmutableEntitiesError, SyntaxError};
10use crate::reader::error::Error;
11use crate::reader::events::XmlEvent;
12use crate::reader::indexset::AttributesSet;
13use crate::reader::lexer::{Lexer, Token};
14
15use std::collections::HashMap;
16use std::io::Read;
17
18macro_rules! gen_takes(
19    ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
20        $(
21        impl MarkupData {
22            #[inline]
23            #[allow(clippy::mem_replace_option_with_none)]
24            #[allow(clippy::mem_replace_with_default)]
25            fn $method(&mut self) -> $t {
26                std::mem::replace(&mut self.$field, $def)
27            }
28        }
29        )+
30    )
31);
32
33gen_takes!(
34    name         -> take_name, String, String::new();
35    ref_data     -> take_ref_data, String, String::new();
36
37    encoding     -> take_encoding, Option<String>, None;
38
39    element_name -> take_element_name, Option<OwnedName>, None;
40
41    attr_name    -> take_attr_name, Option<OwnedName>, None;
42    attributes   -> take_attributes, AttributesSet, AttributesSet::new()
43);
44
45mod inside_cdata;
46mod inside_closing_tag_name;
47mod inside_comment;
48mod inside_declaration;
49mod inside_doctype;
50mod inside_opening_tag;
51mod inside_processing_instruction;
52mod inside_reference;
53mod outside_tag;
54
55static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
56static DEFAULT_STANDALONE: Option<bool> = None;
57
58type ElementStack = Vec<OwnedName>;
59
60/// Newtype for `XmlEvent` only. If you import this, use `std::result::Result` for other results.
61pub type Result = super::Result<XmlEvent>;
62
63/// Pull-based XML parser.
64pub(crate) struct PullParser {
65    config: ParserConfig,
66    lexer: Lexer,
67    st: State,
68    state_after_reference: State,
69    buf: String,
70
71    /// From DTD internal subset
72    entities: HashMap<String, String>,
73
74    nst: NamespaceStack,
75
76    data: MarkupData,
77    final_result: Option<Result>,
78    next_event: Option<Result>,
79    est: ElementStack,
80    pos: Vec<TextPosition>,
81
82    encountered: Encountered,
83    inside_whitespace: bool,
84    seen_prefix_separator: bool,
85    pop_namespace: bool,
86}
87
88// Keeps track when XML declaration can happen
89#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
90enum Encountered {
91    None = 0,
92    AnyChars, // whitespace before <?xml is not allowed
93    Declaration,
94    Comment,
95    Doctype,
96    Element,
97}
98
99impl PullParser {
100    /// Returns a new parser using the given config.
101    #[inline]
102    pub fn new(config: impl Into<ParserConfig>) -> Self {
103        Self::new_with_config(config.into())
104    }
105
106    #[inline]
107    fn new_with_config(config: ParserConfig) -> Self {
108        let mut lexer = Lexer::new(&config);
109        if let Some(enc) = config.override_encoding {
110            lexer.set_encoding(enc);
111        }
112
113        let mut pos = Vec::with_capacity(16);
114        pos.push(TextPosition::new());
115
116        Self {
117            config,
118            lexer,
119            st: State::DocumentStart,
120            state_after_reference: State::OutsideTag,
121            buf: String::new(),
122            entities: HashMap::new(),
123            nst: NamespaceStack::default(),
124
125            data: MarkupData {
126                name: String::new(),
127                doctype: None,
128                doctype_name: None,
129                doctype_public_id: None,
130                doctype_system_id: None,
131                version: None,
132                encoding: None,
133                standalone: None,
134                ref_data: String::new(),
135                element_name: None,
136                quote: None,
137                attr_name: None,
138                attributes: AttributesSet::new(),
139            },
140            final_result: None,
141            next_event: None,
142            est: Vec::new(),
143            pos,
144
145            encountered: Encountered::None,
146            inside_whitespace: true,
147            seen_prefix_separator: false,
148            pop_namespace: false,
149        }
150    }
151
152    /// Checks if this parser ignores the end of stream errors.
153    pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.ignore_end_of_stream }
154
155    /// Retrieves the Doctype from the document if any
156    #[inline]
157    #[deprecated(note = "there is `XmlEvent::Doctype` now")]
158    pub fn doctype(&self) -> Option<&str> {
159        self.data.doctype.as_deref()
160    }
161
162    pub fn doctype_ids(&self) -> Option<DoctypeRef<'_>> {
163        Some(DoctypeRef {
164            syntax: self.data.doctype.as_deref()?,
165            name: self.data.doctype_name.as_deref()?,
166            public_id: self.data.doctype_public_id.as_deref(),
167            system_id: self.data.doctype_system_id.as_deref(),
168        })
169    }
170
171    #[inline(never)]
172    fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
173        if new_encounter <= self.encountered {
174            return None;
175        }
176        let prev_enc = self.encountered;
177        self.encountered = new_encounter;
178
179        // If declaration was not parsed and we have encountered an element,
180        // emit this declaration as the next event.
181        if prev_enc == Encountered::None {
182            self.push_pos();
183            Some(Ok(XmlEvent::StartDocument {
184                version: DEFAULT_VERSION,
185                encoding: self.lexer.encoding().to_string(),
186                standalone: DEFAULT_STANDALONE,
187            }))
188        } else {
189            None
190        }
191    }
192
193    #[inline]
194    pub fn add_entities<S: Into<String>, T: Into<String>>(&mut self, entities: impl IntoIterator<Item=(S, T)>) -> std::result::Result<(), ImmutableEntitiesError> {
195        if self.data.standalone == Some(true) {
196            return Err(ImmutableEntitiesError::StandaloneDocument);
197        }
198        if self.encountered == Encountered::Element {
199            return Err(ImmutableEntitiesError::ElementEncountered);
200        }
201        self.config.extra_entities.extend(entities.into_iter().map(|(k, v)| (k.into(), v.into())));
202        Ok(())
203    }
204}
205
206impl Position for PullParser {
207    /// Returns the position of the last event produced by the parser
208    #[inline]
209    fn position(&self) -> TextPosition {
210        self.pos.first().copied().unwrap_or_else(TextPosition::new)
211    }
212}
213
214#[derive(Copy, Clone, PartialEq, Debug)]
215pub(crate) enum State {
216    OutsideTag,
217    InsideOpeningTag(OpeningTagSubstate),
218    InsideClosingTag(ClosingTagSubstate),
219    InsideProcessingInstruction(ProcessingInstructionSubstate),
220    InsideComment,
221    InsideCData,
222    InsideDeclaration(DeclarationSubstate),
223    InsideDoctype(DoctypeSubstate),
224    InsideReference,
225    DocumentStart,
226}
227
228#[derive(Copy, Clone, PartialEq, Debug)]
229pub(crate) enum DoctypeSubstate {
230    BeforeDoctypeName,
231    DoctypeName,
232    Outside,
233    // PUBLIC ... SYSTEM... public and system literal parts.
234    ExternalIdKeyword,
235    BeforeSystemLiteral,
236    SystemLiteral,
237    BeforePubId,
238    PubId,
239    // Internal Subset related bits, parts inside [...].
240    InternalSubset,
241    String,
242    InsideName,
243    BeforeEntityName,
244    EntityName,
245    BeforeEntityValue,
246    EntityValue,
247    NumericReferenceStart,
248    NumericReference,
249    /// expansion
250    PEReferenceInValue,
251    PEReferenceInDtd,
252    /// name definition
253    PEReferenceDefinitionStart,
254    PEReferenceDefinition,
255    IgnorePI,
256    SkipDeclaration,
257    Comment,
258}
259
260#[derive(Copy, Clone, PartialEq, Debug)]
261pub(crate) enum OpeningTagSubstate {
262    InsideName,
263
264    InsideTag,
265
266    InsideAttributeName,
267    AfterAttributeName,
268
269    InsideAttributeValue,
270    AfterAttributeValue,
271}
272
273#[derive(Copy, Clone, PartialEq, Debug)]
274pub(crate) enum ClosingTagSubstate {
275    CTInsideName,
276    CTAfterName,
277}
278
279#[derive(Copy, Clone, PartialEq, Debug)]
280pub(crate) enum ProcessingInstructionSubstate {
281    PIInsideName,
282    PIInsideData,
283}
284
285#[derive(Copy, Clone, PartialEq, Debug)]
286pub(crate) enum DeclarationSubstate {
287    BeforeVersion,
288    InsideVersion,
289    AfterVersion,
290
291    InsideVersionValue,
292    AfterVersionValue,
293
294    BeforeEncoding,
295    InsideEncoding,
296    AfterEncoding,
297
298    InsideEncodingValue,
299    AfterEncodingValue,
300
301    BeforeStandaloneDecl,
302    InsideStandaloneDecl,
303    AfterStandaloneDecl,
304
305    InsideStandaloneDeclValue,
306    AfterStandaloneDeclValue,
307}
308
309#[derive(Copy, Clone, PartialEq, Debug)]
310enum QualifiedNameTarget {
311    Attribute,
312    OpeningTag,
313    ClosingTag,
314}
315
316#[derive(Copy, Clone, PartialEq, Eq)]
317enum QuoteToken {
318    SingleQuoteToken,
319    DoubleQuoteToken,
320}
321
322impl QuoteToken {
323    #[inline]
324    fn from_token(t: Token) -> Option<Self> {
325        match t {
326            Token::SingleQuote => Some(Self::SingleQuoteToken),
327            Token::DoubleQuote => Some(Self::DoubleQuoteToken),
328            _ => {
329                debug_assert!(false);
330                None
331            },
332        }
333    }
334
335    const fn as_token(self) -> Token {
336        match self {
337            Self::SingleQuoteToken => Token::SingleQuote,
338            Self::DoubleQuoteToken => Token::DoubleQuote,
339        }
340    }
341}
342
343struct MarkupData {
344    name: String,     // used for processing instruction name
345    ref_data: String,  // used for reference content
346
347    doctype: Option<String>, // keeps a copy of the original doctype
348    doctype_name: Option<Box<str>>,
349    doctype_public_id: Option<Box<str>>,
350    doctype_system_id: Option<Box<str>>,
351    version: Option<XmlVersion>,  // used for XML declaration version
352    encoding: Option<String>,  // used for XML declaration encoding
353    standalone: Option<bool>,  // used for XML declaration standalone parameter
354
355    element_name: Option<OwnedName>,  // used for element name
356
357    quote: Option<QuoteToken>,  // used to hold opening quote for attribute value
358    attr_name: Option<OwnedName>,  // used to hold attribute name
359    attributes: AttributesSet,   // used to hold all accumulated attributes
360}
361
362impl PullParser {
363    /// Returns next event read from the given buffer.
364    ///
365    /// This method should be always called with the same buffer. If you call it
366    /// providing different buffers each time, the result will be undefined.
367    pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
368        if let Some(ref ev) = self.final_result {
369            return ev.clone();
370        }
371
372        if let Some(ev) = self.next_event.take() {
373            return ev;
374        }
375
376        if self.pop_namespace {
377            self.pop_namespace = false;
378            self.nst.pop();
379        }
380
381        loop {
382            debug_assert!(self.next_event.is_none());
383            debug_assert!(!self.pop_namespace);
384
385            // While lexer gives us Ok(maybe_token) -- we loop.
386            // Upon having a complete XML-event -- we return from the whole function.
387            match self.lexer.next_token(r) {
388                Ok(Token::Eof) => {
389                    // Forward pos to the lexer head
390                    self.next_pos();
391                    return self.handle_eof();
392                },
393                Ok(token) => match self.dispatch_token(token) {
394                    None => continue,
395                    Some(Ok(xml_event)) => {
396                        self.next_pos();
397                        return Ok(xml_event);
398                    },
399                    Some(Err(xml_error)) => {
400                        self.next_pos();
401                        return self.set_final_result(Err(xml_error));
402                    },
403                },
404                Err(lexer_error) => {
405                    self.next_pos();
406                    return self.set_final_result(Err(lexer_error));
407                },
408            }
409        }
410    }
411
412    /// Handle end of stream
413    #[cold]
414    fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> {
415        let ev = if self.depth() == 0 {
416            if self.encountered == Encountered::Element && self.st == State::OutsideTag {  // all is ok
417                Ok(XmlEvent::EndDocument)
418            } else if self.encountered < Encountered::Element {
419                self.error(SyntaxError::NoRootElement)
420            } else {  // self.st != State::OutsideTag
421                self.error(SyntaxError::UnexpectedEof)  // TODO: add expected hint?
422            }
423        } else if self.config.ignore_end_of_stream {
424            self.final_result = None;
425            self.lexer.reset_eof_handled();
426            return self.error(SyntaxError::UnbalancedRootElement);
427        } else {
428            self.error(SyntaxError::UnbalancedRootElement)
429        };
430        self.set_final_result(ev)
431    }
432
433    // This function is to be called when a terminal event is reached.
434    // The function sets up the `self.final_result` into `Some(result)` and return `result`.
435    #[inline]
436    fn set_final_result(&mut self, result: Result) -> Result {
437        self.final_result = Some(result.clone());
438        result
439    }
440
441    #[cold]
442    #[allow(clippy::needless_pass_by_value)]
443    fn error(&self, e: SyntaxError) -> Result {
444        Err(Error::syntax(e.to_cow(), self.lexer.position()))
445    }
446
447    #[inline]
448    fn next_pos(&mut self) {
449        // unfortunately calls to next_pos will never be perfectly balanced with push_pos,
450        // at very least because parse errors and EOF can happen unexpectedly without a prior push.
451        if !self.pos.is_empty() {
452            if self.pos.len() > 1 {
453                self.pos.remove(0);
454            } else {
455                self.pos[0] = self.lexer.position();
456            }
457        }
458    }
459
460    #[inline]
461    #[track_caller]
462    fn push_pos(&mut self) {
463        debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events.
464            This case is ignored in release mode, and merely causes document positions to be out of sync.
465            Please file a bug and include the XML document that triggers this assert.");
466
467        // it has capacity preallocated for more than it ever needs, so this reduces code size
468        if self.pos.len() != self.pos.capacity() {
469            self.pos.push(self.lexer.position());
470        } else if self.pos.len() > 1 {
471            self.pos.remove(0); // this mitigates the excessive push_pos() call
472        }
473    }
474
475    #[inline(never)]
476    fn dispatch_token(&mut self, t: Token) -> Option<Result> {
477        match self.st {
478            State::OutsideTag                     => self.outside_tag(t),
479            State::InsideOpeningTag(s)            => self.inside_opening_tag(t, s),
480            State::InsideClosingTag(s)            => self.inside_closing_tag_name(t, s),
481            State::InsideReference                => self.inside_reference(t),
482            State::InsideComment                  => self.inside_comment(t),
483            State::InsideCData                    => self.inside_cdata(t),
484            State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
485            State::InsideDoctype(s)               => self.inside_doctype(t, s),
486            State::InsideDeclaration(s)           => self.inside_declaration(t, s),
487            State::DocumentStart                  => self.document_start(t),
488        }
489    }
490
491    #[inline]
492    fn depth(&self) -> usize {
493        self.est.len()
494    }
495
496    #[inline]
497    fn buf_has_data(&self) -> bool {
498        !self.buf.is_empty()
499    }
500
501    #[inline]
502    fn take_buf(&mut self) -> String {
503        std::mem::take(&mut self.buf)
504    }
505
506    #[inline]
507    fn take_buf_boxed(&mut self) -> Box<str> {
508        let res = self.buf.as_str().into();
509        self.buf.clear();
510        res
511    }
512
513    #[inline]
514    #[allow(clippy::wrong_self_convention)]
515    fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
516        self.st = st;
517        ev
518    }
519
520    #[inline]
521    #[allow(clippy::wrong_self_convention)]
522    fn into_state_continue(&mut self, st: State) -> Option<Result> {
523        self.into_state(st, None)
524    }
525
526    #[inline]
527    #[allow(clippy::wrong_self_convention)]
528    fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
529        self.into_state(st, Some(ev))
530    }
531
532    /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
533    /// an error is returned.
534    ///
535    /// # Parameters
536    /// * `t`       --- next token;
537    /// * `on_name` --- a callback which is executed when whitespace is encountered.
538    fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
539      where F: Fn(&mut Self, Token, OwnedName) -> Option<Result> {
540
541        let try_consume_name = move |this: &mut Self, t| {
542            let name = this.take_buf();
543            this.seen_prefix_separator = false;
544            match name.parse() {
545                Ok(name) => on_name(this, t, name),
546                Err(()) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))),
547            }
548        };
549
550        match t {
551            // There can be only one colon, and not as the first character
552            Token::Character(':') if self.buf_has_data() && !self.seen_prefix_separator => {
553                self.buf.push(':');
554                self.seen_prefix_separator = true;
555                None
556            },
557
558            Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) ||
559                                          self.buf_has_data() && is_name_char(c)) => {
560                if self.buf.len() > self.config.max_name_length {
561                    return Some(self.error(SyntaxError::ExceededConfiguredLimit));
562                }
563                self.buf.push(c);
564                None
565            },
566
567            Token::EqualsSign if target == QualifiedNameTarget::Attribute => try_consume_name(self, t),
568
569            Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTag => try_consume_name(self, t),
570
571            Token::TagEnd if target == QualifiedNameTarget::OpeningTag ||
572                      target == QualifiedNameTarget::ClosingTag => try_consume_name(self, t),
573
574            Token::Character(c) if is_whitespace_char(c) => try_consume_name(self, t),
575
576            _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))),
577        }
578    }
579
580    /// Dispatches tokens in order to process attribute value.
581    ///
582    /// # Parameters
583    /// * `t`        --- next token;
584    /// * `on_value` --- a callback which is called when terminating quote is encountered.
585    fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
586      where F: Fn(&mut Self, String) -> Option<Result> {
587        match t {
588            Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace
589
590            Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
591                None => {  // Entered attribute value
592                    self.data.quote = QuoteToken::from_token(t);
593                    None
594                },
595                Some(q) if q.as_token() == t => {
596                    self.data.quote = None;
597                    let value = self.take_buf();
598                    on_value(self, value)
599                },
600                _ => {
601                    if let Token::Character(c) = t {
602                        if !self.is_valid_xml_char_not_restricted(c) {
603                            return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
604                        }
605                    }
606                    if self.buf.len() > self.config.max_attribute_length {
607                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
608                    }
609                    t.push_to_string(&mut self.buf);
610                    None
611                },
612            },
613
614            Token::ReferenceStart if self.data.quote.is_some() => {
615                self.state_after_reference = self.st;
616                self.into_state_continue(State::InsideReference)
617            },
618
619            Token::OpeningTagStart | Token::ProcessingInstructionStart => {
620                Some(self.error(SyntaxError::UnexpectedOpeningTag))
621            },
622
623            Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => {
624                Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
625            },
626
627            // Every character except " and ' and < is okay
628            _ if self.data.quote.is_some() => {
629                if self.buf.len() > self.config.max_attribute_length {
630                    return Some(self.error(SyntaxError::ExceededConfiguredLimit));
631                }
632                t.push_to_string(&mut self.buf);
633                None
634            },
635
636            _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
637        }
638    }
639
640    fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
641        let mut name = self.data.take_element_name()?;
642        let mut attributes = self.data.take_attributes().into_vec();
643
644        // check whether the name prefix is bound and fix its namespace
645        match self.nst.get(name.borrow().prefix_repr()) {
646            Some("") => name.namespace = None, // default namespace
647            Some(ns) => name.namespace = Some(ns.into()),
648            None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
649        }
650
651        // check and fix accumulated attributes prefixes
652        for attr in &mut attributes {
653            if let Some(ref pfx) = attr.name.prefix {
654                let new_ns = match self.nst.get(pfx) {
655                    Some("") => None, // default namespace
656                    Some(ns) => Some(ns.into()),
657                    None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into()))),
658                };
659                attr.name.namespace = new_ns;
660            }
661        }
662
663        if emit_end_element {
664            self.pop_namespace = true;
665            self.next_event = Some(Ok(XmlEvent::EndElement {
666                name: name.clone()
667            }));
668        } else {
669            self.est.push(name.clone());
670        }
671        let namespace = self.nst.squash();
672        self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
673            name,
674            attributes,
675            namespace
676        }))
677    }
678
679    fn emit_end_element(&mut self) -> Option<Result> {
680        let mut name = self.data.take_element_name()?;
681
682        // check whether the name prefix is bound and fix its namespace
683        match self.nst.get(name.borrow().prefix_repr()) {
684            Some("") => name.namespace = None, // default namespace
685            Some(ns) => name.namespace = Some(ns.into()),
686            None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))),
687        }
688
689        let op_name = self.est.pop()?;
690
691        if name == op_name {
692            self.pop_namespace = true;
693            self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
694        } else {
695            Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into())))
696        }
697    }
698
699    #[inline]
700    fn is_valid_xml_char(&self, c: char) -> bool {
701        if Some(XmlVersion::Version11) == self.data.version {
702            is_xml11_char(c)
703        } else {
704            is_xml10_char(c)
705        }
706    }
707
708    #[inline]
709    fn is_valid_xml_char_not_restricted(&self, c: char) -> bool {
710        if Some(XmlVersion::Version11) == self.data.version {
711            is_xml11_char_not_restricted(c)
712        } else {
713            is_xml10_char(c)
714        }
715    }
716}
717
718#[cfg(test)]
719mod tests {
720    use crate::attribute::OwnedAttribute;
721    use crate::common::TextPosition;
722    use crate::name::OwnedName;
723    use crate::reader::events::XmlEvent;
724    use crate::reader::parser::PullParser;
725    use crate::reader::ParserConfig;
726    use std::io::BufReader;
727
728    fn new_parser() -> PullParser {
729        PullParser::new(ParserConfig::new())
730    }
731
732    macro_rules! expect_event(
733        ($r:expr, $p:expr, $t:pat) => (
734            match $p.next(&mut $r) {
735                $t => {}
736                e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t))
737            }
738        );
739        ($r:expr, $p:expr, $t:pat => $c:expr ) => (
740            match $p.next(&mut $r) {
741                $t if $c => {}
742                e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c))
743            }
744        )
745    );
746
747    macro_rules! test_data(
748        ($d:expr) => ({
749            static DATA: &'static str = $d;
750            let r = BufReader::new(DATA.as_bytes());
751            let p = new_parser();
752            (r, p)
753        })
754    );
755
756    #[test]
757    fn issue_3_semicolon_in_attribute_value() {
758        let (mut r, mut p) = test_data!(r#"
759            <a attr="zzz;zzz" />
760        "#);
761
762        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
763        expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
764            *name == OwnedName::local("a") &&
765             attributes.len() == 1 &&
766             attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
767             namespace.is_essentially_empty()
768        );
769        expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
770        expect_event!(r, p, Ok(XmlEvent::EndDocument));
771    }
772
773    #[test]
774    fn issue_140_entity_reference_inside_tag() {
775        let (mut r, mut p) = test_data!(r"
776            <bla>&#9835;</bla>
777        ");
778
779        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
780        expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
781        expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
782        expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
783        expect_event!(r, p, Ok(XmlEvent::EndDocument));
784    }
785
786    #[test]
787    fn issue_220_comment() {
788        let (mut r, mut p) = test_data!(r"<x><!-- <!--></x>");
789        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
790        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
791        expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
792        expect_event!(r, p, Ok(XmlEvent::EndDocument));
793
794        let (mut r, mut p) = test_data!(r"<x><!-- <!---></x>");
795        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
796        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
797        expect_event!(r, p, Err(_)); // ---> is forbidden in comments
798
799        let (mut r, mut p) = test_data!(r"<x><!--<text&x;> <!--></x>");
800        p.config.ignore_comments = false;
801        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
802        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
803        expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!");
804        expect_event!(r, p, Ok(XmlEvent::EndElement { .. }));
805        expect_event!(r, p, Ok(XmlEvent::EndDocument));
806    }
807
808    #[test]
809    fn malformed_declaration_attrs() {
810        let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#);
811        expect_event!(r, p, Err(_));
812
813        let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#);
814        expect_event!(r, p, Err(_));
815
816        let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#);
817        expect_event!(r, p, Err(_));
818
819        let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#);
820        expect_event!(r, p, Err(_));
821
822        let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#);
823        expect_event!(r, p, Err(_));
824    }
825
826    #[test]
827    fn opening_tag_in_attribute_value() {
828        use crate::reader::error::{SyntaxError, Error};
829
830        let (mut r, mut p) = test_data!(r#"
831            <a attr="zzz<zzz" />
832        "#);
833
834        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
835        expect_event!(r, p, Err(ref e) =>
836            *e == Error::syntax(SyntaxError::UnexpectedOpeningTag.to_cow(), TextPosition { row: 1, column: 24 }));
837    }
838
839    #[test]
840    fn processing_instruction_in_attribute_value() {
841        use crate::reader::error::{SyntaxError, Error};
842
843        let (mut r, mut p) = test_data!(r#"
844            <y F="<?abc"><x G="/">
845        "#);
846
847        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
848        expect_event!(r, p, Err(ref e) =>
849            *e == Error::syntax(SyntaxError::UnexpectedOpeningTag.to_cow(),
850                TextPosition { row: 1, column: 18 }));
851    }
852
853    #[test]
854    fn reference_err() {
855        let (mut r, mut p) = test_data!(r"
856            <a>&&amp;</a>
857        ");
858
859        expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
860        expect_event!(r, p, Ok(XmlEvent::StartElement { .. }));
861        expect_event!(r, p, Err(_));
862    }
863
864    #[test]
865    fn state_size() {
866        assert_eq!(2, std::mem::size_of::<super::State>());
867        assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>());
868    }
869}