// XML_Parser.cc - an XML file reader
//
// Copyright (C) 2004 Sam Varner
//
// This file is part of Vamos Automotive Simulator.
//
// Vamos is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Vamos is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Vamos. If not, see <http://www.gnu.org/licenses/>.
#include "XML_Parser.hpp"
#include <algorithm>
#include <cassert>
#include <deque>
#include <fstream>
#include <sstream>
using namespace Vamos_Media;
std::string remove_leading_space(std::string data_string)
{
std::string::iterator it;
for (it = data_string.begin(); it != data_string.end(); it++)
if (*it != ' ' && *it != '\t' && *it != '\n')
break;
return std::string(it, data_string.end());
}
//----------------------------------------------------------------------------------------
XML_Tag::XML_Tag(std::ifstream &stream)
: m_type(NONE),
m_lines(0)
{
if (!read_to_tag_start(stream) && !read_to_tag_end(stream))
throw Unterminated_Tag(get_lines(), m_text, true);
m_data = remove_leading_space(m_data);
if (m_text.empty())
return;
m_type = find_tag_type(stream);
if (m_type == COMMENT)
return;
Str_Iter text_start;
Str_Iter text_end;
get_text_boundries(text_start, text_end);
m_label = find_label(text_start, text_end);
find_attributes(text_start, text_end);
}
void XML_Tag::get_text_boundries(Str_Iter &text_start, Str_Iter &text_end)
{
text_start = m_text.begin() + 1;
text_end = m_text.end() - 1;
switch (m_type)
{
case PROCESSING_INSTRUCTION:
text_start++;
text_end--;
break;
case END:
text_start++;
break;
case EMPTY:
text_end--;
break;
case START:
break;
default:
assert(false);
}
}
bool XML_Tag::read_to_tag_start(std::ifstream &stream)
{
char ch;
while (get_next_char(stream, ch))
{
if (ch == '<')
{
m_text.push_back(ch);
return false;
}
m_data.push_back(ch);
}
return true;
}
bool XML_Tag::read_to_tag_end(std::ifstream &stream)
{
bool in_comment = false;
std::deque<char> input = {'\0', '\0', '\0', '\0'};
while (get_next_char(stream, input.front()))
{
if (input[3] == '!' && input[2] == '-' && input[1] == '-')
in_comment = true;
if (input.front() == '<' && !in_comment)
throw Unterminated_Tag(get_lines(), m_text, false);
m_text.push_back(input.front());
if (input.front() == '>')
return true;
input.push_front('\0');
}
return false;
}
Vamos_Media::XML_Tag::Tag_Type XML_Tag::find_tag_type(std::ifstream &stream)
{
assert(m_text.size() > 4);
auto last = *(m_text.end() - 2);
if (m_text[1] == '?' && last == '?')
return PROCESSING_INSTRUCTION;
else if (m_text[1] == '!' && m_text[2] == '-' && m_text[3] == '-')
{
eat_comment(stream);
return COMMENT;
}
else if (m_text[1] == '/')
return END;
else if (last == '/')
return EMPTY;
return START;
}
std::string XML_Tag::find_label(Str_Iter text_start, Str_Iter text_end)
{
return std::string(text_start, std::find(text_start, text_end, ' '));
}
std::ifstream &XML_Tag::get_next_char(std::ifstream &stream, char &ch)
{
ch = '\0';
stream.get(ch);
if (ch == '\n')
m_lines++;
return stream;
}
bool XML_Tag::find_comment_end(std::ifstream &stream)
{
std::deque<char> input = {'\0', '\0', '\0'};
while (get_next_char(stream, input.front()))
{
if (input[0] == '>' && input[1] == '-' && input[2] == '-')
return true;
input.push_front('\0');
}
return false;
}
void XML_Tag::eat_comment(std::ifstream &stream)
{
// If there are no '>' within the comment, then we have the whole thing.
auto last = m_text.size() - 1;
if (m_text[last - 1] == '-' && m_text[last - 2] == '-')
return;
if (!find_comment_end(stream))
throw Unterminated_Tag(get_lines(), m_text, true);
}
void XML_Tag::skip_spaces(Str_Iter &text_start)
{
while (*text_start == ' ')
++text_start;
}
void XML_Tag::find_attributes(Str_Iter text_start, Str_Iter text_end)
{
text_start += m_label.size();
if (text_start == text_end)
return;
++text_end;
while (true)
{
skip_spaces(text_start);
auto attrib_end = std::find(text_start, text_end, '"');
if (attrib_end == text_end)
return;
attrib_end = std::find(attrib_end + 1, text_end, '"');
if (attrib_end == text_end)
throw Unterminated_Attribute(get_lines(), std::string(text_start, text_end), true);
m_attributes.push_back(get_attribute(text_start, attrib_end));
text_start = attrib_end + 1;
}
}
Vamos_Media::XML_Tag::Attribute XML_Tag::get_attribute(Str_Iter text_start, Str_Iter text_end)
{
auto mark = std::find(text_start, text_end, '=');
std::string name(text_start, mark);
mark += 2;
std::string val(mark, std::find(mark, text_end, '"'));
return Attribute(name, val);
}
//----------------------------------------------------------------------------------------
std::string XML_Path::subpath(size_t n) const
{
auto start = m_path.length() - 1;
auto rest = m_path;
for (size_t i = 0; i < n; i++)
{
start = rest.find_last_of("/");
rest = rest.substr(0, start);
}
return m_path.substr(start + 1);
}
std::vector<std::string> split(std::string in)
{
std::vector<std::string> out;
size_t start = 0;
size_t end = in.size();
while ((end = in.find('*', start)) != std::string::npos)
{
out.push_back(in.substr(start, end - start));
start = end + 1;
}
out.push_back(in.substr(start, end - start));
return out;
}
bool XML_Path::match(std::string pattern) const
{
std::vector<std::string> words = split(pattern);
assert(words.size() > 0);
// If no wildcard, must match whole string.
if (words.size() == 1)
return m_path == pattern;
size_t start_index = 0;
for (std::vector<std::string>::iterator it = words.begin(); it != words.end() - 1; it++)
{
if ((start_index = m_path.find(*it, start_index)) == std::string::npos)
return false;
// The first (possibly empty) element must match the beginning
// of the candidate.
if ((it == words.begin()) && (start_index > 0))
return false;
start_index += it->size();
}
size_t end_index = start_index;
start_index = m_path.rfind(*(words.end() - 1));
if (start_index == std::string::npos)
return false;
// The last (possibly empty) element must match the end of the path.
if (start_index + (words.end() - 1)->size() != m_path.size())
return false;
// The last match must not overlap previous matches.
if (start_index < end_index)
return false;
return true;
}
//** Class XML_Parser
XML_Parser::XML_Parser() : mp_stream(0)
{
}
XML_Parser::~XML_Parser()
{
delete mp_stream;
}
void XML_Parser::read(std::string file)
{
m_file = file;
mp_stream = new std::ifstream(file.c_str());
if (!mp_stream || !*mp_stream)
throw No_XML_File(m_file);
m_line = 1;
try
{
read_document();
}
catch (XML_Unterminated &unterminated)
{
handle_unterminated(unterminated);
}
if (!m_path.empty())
{
std::string message = "Unterminated \"<" + m_path.top() + ">\" tag";
throw Tag_Mismatch(m_file, -1, message);
}
}
void XML_Parser::error(std::string message)
{
throw XML_Exception(m_file, m_line, message);
}
// Read the XML declaration.
void XML_Parser::check_declaration()
{
XML_Tag tag(*mp_stream);
m_line += tag.get_lines();
if (tag.get_type() != XML_Tag::PROCESSING_INSTRUCTION || tag.get_label() != "xml")
throw No_Declaration(m_file, m_line, "XML declaration is missing");
}
bool XML_Parser::run_callbacks(const XML_Tag &tag)
{
switch (tag.get_type())
{
case XML_Tag::NONE:
return true;
case XML_Tag::START:
on_data(tag.get_data());
on_start_tag(tag);
break;
case XML_Tag::END:
on_data(tag.get_data());
on_end_tag(tag);
break;
case XML_Tag::EMPTY:
on_start_tag(tag);
on_end_tag(tag);
break;
case XML_Tag::COMMENT:
break;
default:
std::string message = '"' + tag.get_text() + "\" is an unrecognized tag";
throw Bad_Tag_Type(m_file, m_line, message);
break;
}
return false;
}
void XML_Parser::read_document()
{
check_declaration();
bool done = false;
while (!done)
{
XML_Tag tag(*mp_stream);
m_line += tag.get_lines();
// Match start and end tags.
if ((tag.get_type() == XML_Tag::START) || (tag.get_type() == XML_Tag::EMPTY))
add_tag(tag);
done = run_callbacks(tag);
if (tag.get_type() == XML_Tag::END || tag.get_type() == XML_Tag::EMPTY)
remove_tag(tag);
}
}
void XML_Parser::add_tag(const XML_Tag &tag)
{
m_path.push(tag.get_label());
}
void XML_Parser::remove_tag(const XML_Tag &tag)
{
if (tag.get_label() != m_path.top())
{
std::string message
= "Expected </" + m_path.top() + "> but found </" + tag.get_label() + '>';
throw Tag_Mismatch(m_file, m_line, message);
}
m_path.drop();
}
void XML_Parser::handle_unterminated(XML_Unterminated &unterminated)
{
// If the tag ends with \n, almost certainly a '>' was
// forgotten on the previuos line.
unterminated.lines -= std::count(unterminated.text.begin(), unterminated.text.end(), '\n');
unterminated.text
= std::string(unterminated.text.begin(),
std::find(unterminated.text.begin(), unterminated.text.end(), '\n'));
std::ostringstream message;
message << '"' << unterminated.delimiter << "\" is missing for \"" << unterminated.text << '"';
if (unterminated.eof)
m_line = -1;
else
m_line += unterminated.lines;
throw Tag_Mismatch(m_file, m_line, message.str());
}