[go: up one dir, main page]

xml/
common.rs

1//! Contains common types and functions used throughout the library.
2
3use std::fmt;
4
5/// Represents a position inside some textual document.
6#[derive(Copy, Clone, PartialEq, Eq)]
7pub struct TextPosition {
8    #[doc(hidden)]
9    pub row: u64,
10
11    #[doc(hidden)]
12    pub column: u64,
13}
14
15impl TextPosition {
16    /// Creates a new position initialized to the beginning of the document
17    #[inline]
18    #[must_use]
19    pub const fn new() -> Self {
20        Self { row: 0, column: 0 }
21    }
22
23    /// Advances the position in a line
24    #[inline]
25    pub fn advance(&mut self, count: u8) {
26        self.column += u64::from(count);
27    }
28
29    #[doc(hidden)]
30    #[deprecated]
31    pub fn advance_to_tab(&mut self, width: u8) {
32        let width = u64::from(width);
33        self.column += width - self.column % width;
34    }
35
36    /// Advances the position to the beginning of the next line
37    #[inline]
38    pub fn new_line(&mut self) {
39        self.column = 0;
40        self.row += 1;
41    }
42
43    /// Row, counting from 0. Add 1 to display as users expect!
44    #[must_use]
45    pub fn row(&self) -> u64 {
46        self.row
47    }
48
49    /// Column, counting from 0. Add 1 to display as users expect!
50    #[must_use]
51    pub fn column(&self) -> u64 {
52        self.column
53    }
54}
55
56impl fmt::Debug for TextPosition {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        fmt::Display::fmt(self, f)
59    }
60}
61
62impl fmt::Display for TextPosition {
63    #[inline]
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        write!(f, "{}:{}", self.row + 1, self.column + 1)
66    }
67}
68
69/// Get the position in the document corresponding to the object
70///
71/// This trait is implemented by parsers, lexers and errors.
72pub trait Position {
73    /// Returns the current position or a position corresponding to the object.
74    fn position(&self) -> TextPosition;
75}
76
77impl Position for TextPosition {
78    #[inline]
79    fn position(&self) -> TextPosition {
80        *self
81    }
82}
83
84/// XML version enumeration.
85#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
86pub enum XmlVersion {
87    /// XML version 1.0, or any 1.x version other than 1.1
88    ///
89    /// All future versions are disallowed since XML 1.1, so any version beyond 1.1 is an error tolerated only in XML 1.0.
90    /// <https://www.w3.org/TR/REC-xml/#sec-prolog-dtd>
91    Version10,
92
93    /// XML version 1.1.
94    Version11,
95}
96
97impl XmlVersion {
98    /// Convenience helper which returns a string representation of the given version.
99    ///
100    /// ```
101    /// # use xml::common::XmlVersion;
102    /// assert_eq!(XmlVersion::Version10.as_str(), "1.0");
103    /// assert_eq!(XmlVersion::Version11.as_str(), "1.1");
104    /// ```
105    pub fn as_str(self) -> &'static str {
106        match self {
107            Self::Version10 => "1.0",
108            Self::Version11 => "1.1",
109        }
110    }
111}
112
113impl fmt::Display for XmlVersion {
114    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115        self.as_str().fmt(f)
116    }
117}
118
119impl fmt::Debug for XmlVersion {
120    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121        fmt::Display::fmt(self, f)
122    }
123}
124
125/// Checks whether the given character is a white space character (`S`)
126/// as is defined by XML 1.1 specification, [section 2.3][1].
127///
128/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
129#[must_use]
130#[inline]
131pub const fn is_whitespace_char(c: char) -> bool {
132    matches!(c, '\x20' | '\x0a' | '\x09' | '\x0d')
133}
134
135/// Matches the PubIdChar production.
136pub (crate) fn is_pubid_char(c: char) -> bool {
137    matches!(c, '\x20' | '\x0D' | '\x0A' | 'a'..='z' | 'A'..='Z' | '0'..='9' |
138        '-' | '\'' | '(' | ')' | '+' | ',' | '.' | '/' | ':' | '=' | '?' | ';' |
139        '!' | '*' | '#' | '@' | '$' | '_' | '%')
140}
141
142/// Checks whether the given string is compound only by white space
143/// characters (`S`) using the previous `is_whitespace_char` to check
144/// all characters of this string
145pub fn is_whitespace_str(s: &str) -> bool {
146    s.chars().all(is_whitespace_char)
147}
148
149/// Is it a valid character in XML 1.0
150#[must_use]
151pub const fn is_xml10_char(c: char) -> bool {
152    matches!(c, '\u{09}' | '\u{0A}' | '\u{0D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
153}
154
155/// Is it a valid character in XML 1.1
156#[must_use]
157pub const fn is_xml11_char(c: char) -> bool {
158    matches!(c, '\u{01}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..)
159}
160
161/// Is it a valid character in XML 1.1 but not part of the restricted character set
162#[must_use]
163pub const fn is_xml11_char_not_restricted(c: char) -> bool {
164    is_xml11_char(c) &&
165        !matches!(c, '\u{01}'..='\u{08}' | '\u{0B}'..='\u{0C}' | '\u{0E}'..='\u{1F}' | '\u{7F}'..='\u{84}' | '\u{86}'..='\u{9F}')
166}
167
168/// Checks whether the given character is a name start character (`NameStartChar`)
169/// as is defined by XML 1.1 specification, [section 2.3][1].
170///
171/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
172#[must_use]
173pub const fn is_name_start_char(c: char) -> bool {
174    matches!(c,
175        ':' | 'A'..='Z' | '_' | 'a'..='z' |
176        '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' |
177        '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' |
178        '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' |
179        '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' |
180        '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
181        '\u{10000}'..='\u{EFFFF}'
182    )
183}
184
185/// Checks whether the given character is a name character (`NameChar`)
186/// as is defined by XML 1.1 specification, [section 2.3][1].
187///
188/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
189#[must_use]
190pub const fn is_name_char(c: char) -> bool {
191    if is_name_start_char(c) {
192        return true;
193    }
194    matches!(c,
195        '-' | '.' | '0'..='9' | '\u{B7}' |
196        '\u{300}'..='\u{36F}' | '\u{203F}'..='\u{2040}'
197    )
198}