/***************************************************************************
ltxfilelex.ll - rules for reading EQC commands from a ltx file
- scanner generation file for flex
-------------------
begin : Sun Oct 21 2001
copyright : (C) 2001 by Jan Rheinlaender
email : jrheinlaender@users.sourceforge.net
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
%{
#include "ltxfilelex.h"
// The following includes are necessary for ltxfileparse.h
#include "equation.h"
#include "ltxfileparse.h"
#include <stdio.h>
#include "unit.h"
#include "func.h" // *** added in 0.5
#include "msgdriver.h" // *** added in 1.3.1
#include "utils.h" // *** added in 0.8
#include "optstack.h" // *** added in 1.2
// This can also be done by calling flex with --nounistd *** added in 1.4.1
#ifdef _MSC_VER
#define YY_NO_UNISTD_H
#endif
std::string scanline = ""; // for locating error positions *** added in 0.8, TODO: Unused!!!
int linenumber = 1; // for counting the lines
char verbatim_delimiter; // *** added in 0.4
// *** Moved definition of eqcpath to compiler in 1.1
// List of keywords that create equations and expressions
// *** added \eqeval, \eqevalp and \tseries in 1.0
// *** removed floor and ceil in 1.2
const char *eqstm[13] = {"\\eq","\\eqadd","\\eqsub","\\eqmul","\\eqdiv","\\eqpow","\\eqfunc",
"\\eqdiff","\\eqsubst","\\eqsubstc", "\\eqrev","\\eqsimpf",
"\\eqsolve"};
std::set<const char*, ltstr> eq_statement(eqstm, eqstm+13);
const char *exstm[15] = {"\\lhs","\\rhs","\\val","\\valwith","\\quantity","\\quantitywith",
"\\units","\\unitswith","\\numval","numvalwith",
"\\numer","\\denom", "\\eqeval", "\\eqevalp", "\\tseries"};
std::set<const char*, ltstr> ex_statement(exstm, exstm+15);
// Stack for keeping track of scanning states. state.top()->start always is the current
// start condition
struct staterec {
int start; // The flex start condition
std::string argument; // *** added in 0.6
int bracketlevel; // *** replaces fbracketstack in 0.8
bool implicit_mul_might_follow; // Could an implicit multiplication follow after this token?
bool last_was_newline; // Avoid producing empty lines, e.g. \eq*{x=3} on a line by itself would do that *** added in 1.4.3
bool asterisk_statement;
int last_token; // Stores the last token in expression scanning mode
staterec() {
argument = "";
bracketlevel = 0;
implicit_mul_might_follow = false;
last_was_newline = false; // *** added in 1.4.3
asterisk_statement = false; // *** added in 1.4.3
last_token = ' ';
};
};
std::stack<staterec*> states;
staterec* state; // Contains the current state to avoid using states.top() all the time
// Note: An implicit multiplication occurs when the multiplication sign '*' is omitted, i.e.
// '3x+4y' has two implicit multiplications. Parsing this is difficult because adding a rule
// like exp exp -> exp leads to numerous shift/reduce conflicts. The way to avoid it is to add
// some ugly hacks to the scanner. The logic behind these hacks is:
// An implicit multiplication might follow the tokens SYMBOL, DIGIT, UNIT, MACRO or any kind of closing bracket
// It will only follow if the next token is SYMBOL, MACRO, DIGIT, UNIT, FUNC, FRAC, VALUE or any kind
// of opening bracket.
// There are some exceptions:
// - No implicit mul may occur between a function name and an opening bracket. Note that this
// case is ambiguous! EQC assumes that any opening bracket after a function name belongs to
// the function
// In case an implicit mul is detected, the scanned token is put back on the input stream and IMPMUL returned instead.
// *** corrected bug which parsed 230 1/\s as 2301/s in 0.9
// Forward declaration of helper functions
bool check_implicit_mul(const int token);
void remove_space(std::string &what);
%}
%x PROC_KW VERB VERBATIM SCANSTR SCANQSTR SCANEX
%option noyywrap
%option never-interactive
/* added in 1.4.1 for MSVC compile */
/* Unicode, see
http://groups.google.com/group/comp.compilers/browse_thread/thread/2d2bff9c324d8e05
added in 1.4.3 */
UTF8_BYTE_ORDER_MARK [\xEF][\xBB][\xBF]
ASCII [\x0-\xFF]
U2 [\xC2-\xDF][\x80-\xBF]
U3 [\xE0][\xA0-\xBF][\x80-\xBF]
U4 [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
U5 [\xED][\x80-\x9F][\x80-\xBF]
U6 [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
U7 [\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
U8 [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
U9 [\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
UNICODE {ASCII}|{U2}|{U3}|{U4}|{U5}|{U6}|{U7}|{U8}|{U9}
/* Adding [a-zA-Z\%] at the end might seem superfluous, but it isn't (why? */
LASCII [\x0-\xFF]{-}[[:blank:]]{-}[[:cntrl:]]{-}[[:digit:]]{-}[[:graph:]]{-}[[:print:]]{-}[[:punct:]]{-}[[:space:]]{+}[a-zA-Z\%]
LUNICODE {LASCII}|{U2}|{U3}|{U4}|{U5}|{U6}|{U7}|{U8}|{U9}
/* For scanning expressions */
LETTER [a-zA-Z]|\%
ULETTER {LUNICODE}
DIGIT [0-9]
NAME {ULETTER}+
SUBSCRIPT _\{[^}]*\}|_{ULETTER}|_{DIGIT}
/* A subscript is enclosed in {} and can contain everything except closing brackets.
TODO: This definition should correspond with what Latex allows in a subscript!!! */
/* For scanning keywords */
SEPARATOR [*"=&;{[}\]]
/* *** added bracket definitions in 0.6 (copied from lteqlex.ll) */
LBSIZE \\left|\\bigl|\\Bigl|\\biggl|\\Biggl
RBSIZE \\right|\\bigr\||Bigr|\\biggr|\\Biggr
BOPEN [\{\(\[\|]|\\\{|\\lbrace|\\lbrack|\\lfloor|\\lceil|\\langle
BCLOSE [\}\)\]\|]|\\\}|\\rbrace|\\rbrack|\\rfloor|\\rceil|\\rangle
%%
BEGIN((states.empty() ? INITIAL : state->start)); // *** added in 0.8
MSG_INFO(10) << "Start scanning in state: " << YY_START << endline;
if (YY_START == SCANEX) {
MSG_INFO(10) << "Implicit mul might follow: "
<< (state->implicit_mul_might_follow ? "true" : "false") << endline;
}
{UTF8_BYTE_ORDER_MARK} state->argument += yytext;
%{
// =================== state INITIAL (scan_stm) =========================
// Scanning up to the next statement
// Enter: Once right at the beginning of the input file
// Exit: At the end of the file
// Nesting: Enters state keyword when a macro is found. The macro is put back to be re-scanned.
// Returns STRING if any text was accumulated in state->argument
// Else goes on scanning
// Returns: ENDFILE when EOF is found. This either finishes scanning or goes on scanning the
// file which included this one
// *** added in 0.6, moved to top and changed from SCANSTM to INITIAL in 0.8
%}
\\[[:alpha:]]+ {
yyless(0); // rescan the macro
if (state->argument != "") { // There is some text to be output first
if ((state->argument[0] == '\n') && state->last_was_newline && state->asterisk_statement) {
MSG_INFO(10) << "Skipping double newline" << endline;
state->argument.erase(0,1);
state->last_was_newline = true; // This is still true!
state->asterisk_statement = false;
} else if ((state->argument.length() > 1) && (state->argument[0] == '%') && (state->argument[1] == '\n')
&& state->last_was_newline && state->asterisk_statement) {
MSG_INFO(10) << "Skipping double commented newline" << endline;
state->argument.erase(0,2);
state->last_was_newline = true;
state->asterisk_statement = false;
} else if (state->argument[state->argument.length() - 1] == '\n') { // *** added in 1.4.3
MSG_INFO(10) << "Found ending newline" << endline;
state->last_was_newline = true;
state->asterisk_statement = false;
} else {
state->last_was_newline = false;
state->asterisk_statement = false;
}
yylval.str = new std::string(state->argument);
MSG_INFO(10) << "Found STRING '" << *yylval.str << "'" << endline;
state->argument = ""; // Clear argument for continued scanning after the keyword!
enter_state(keyword); // *** changed from BEGIN(INITIAL) in 0.8
return (STRING); // *** changed ARGUMENT to STRING in 0.8
}
enter_state(keyword); // just go on scanning in keyword mode
}
\\|\\% state->argument += yytext; // handle lone backslashes and escaped percent signs
\n state->argument += yytext; linenumber++;
[^\\\n\%]* state->argument += yytext;
^\%.*\n {
if (!(optstack::options->get(o_comments).boolean))
state->argument += yytext; // Otherwise clean comment lines from output file
linenumber++;
}
\%.*\n state->argument += yytext; linenumber++;
<<EOF>> { // *** removed BEGIN(INITIAL) in 0.8
state->argument += yytext; // TODO: Is this necessary?
yylval.str = new std::string(state->argument);
return(ENDFILE);
}
%{
// =================== state PROC_KW (keyword) =======================
// Enter: When scan_stm finds a macro
// Exit: When the parser finishes with the statement including all arguments
// Nesting: To scan_str for RENEWCOMMAND or scan_ex, depending on the statement
// Note: scan_ex might change to keyword again, resulting in nesting of keywords
// Returns: The keyword when the macro is a keyword
// MACRO if it isn't, then the parser exits to scan_stm immediately
// MACRO which occurs inside arguments to a statement
// WORD (pure text string) which occurs inside arguments
// A separator if one is found
// Changes: Exits and enters verb when \verb is found
// Exits and enters verbatim if the BEGIN statement finds a verbatim environment
// ------------------- Comments ----------------------------
// *** comment handling split up into the different states in 0.7
// *** changed from INITAL to PROC_KW in 0.8
%}
<PROC_KW>\%.*\n { // Comments can always be eaten since all macros are returned directly
// to the compiler, so they can not accidently concatenate with following text
linenumber++;
}
%{
// ------------------- Keywords ----------------------------
// *** removed token SPACE in 0.6
// ------------------- Keywords for statements that return an equation
%}
<PROC_KW>\\eq return(EQUATION);
<PROC_KW>\\eqadd yylval.str = new std::string("add"); return(EQOP);
<PROC_KW>\\eqsub yylval.str = new std::string("sub"); return(EQOP);
<PROC_KW>\\eqmul yylval.str = new std::string("mul"); return(EQOP);
<PROC_KW>\\eqdiv yylval.str = new std::string("div"); return(EQOP);
<PROC_KW>\\eqpow yylval.str = new std::string("pow"); return(EQOP); // *** added in 0.6
<PROC_KW>\\eqfunc yylval.str = new std::string("func"); return(EQOP); // *** added in 0.9
<PROC_KW>\\eqdiff yylval.str = new std::string("diff"); return(EQOP); // *** added in 0.6
<PROC_KW>\\eqsubst yylval.str = new std::string("subst"); return (EQSUBST); // *** changed in 0.9
<PROC_KW>\\eqsubstc yylval.str = new std::string("substc"); return (EQSUBST); // *** added in 0.9
<PROC_KW>\\eqrev return (EQREV);
<PROC_KW>\\eqsimpf return (EQSIMPF); // *** added in 0.6
<PROC_KW>\\eqsolve return(EQSOLVE); // *** added in 0.8
%{
// ------------------- Keywords for statements that return an expression
// *** added tokens ...with in 1.2, removed ceil and floor
%}
<PROC_KW>\\val yylval.str = new std::string("\\val"); return(VALUE);
<PROC_KW>\\quantity yylval.str = new std::string("\\quantity"); return(VALUE);
<PROC_KW>\\units yylval.str = new std::string("\\units"); return(VALUE);
<PROC_KW>\\numval yylval.str = new std::string("\\numval"); return(VALUE);
<PROC_KW>\\valwith yylval.str = new std::string("\\val"); return(VALUEWITH);
<PROC_KW>\\quantitywith yylval.str = new std::string("\\quantity"); return(VALUEWITH);
<PROC_KW>\\unitswith yylval.str = new std::string("\\units"); return(VALUEWITH);
<PROC_KW>\\numvalwith yylval.str = new std::string("\\numval"); return(VALUEWITH);
<PROC_KW>\\lhs yylval.str = new std::string("\\lhs"); return(LHSRHS);
<PROC_KW>\\rhs yylval.str = new std::string("\\rhs"); return(LHSRHS);
<PROC_KW>\\numer return(NUMER); // *** numer and denom added in 0.8
<PROC_KW>\\denom return(DENOM);
<PROC_KW>\\eqeval return(EQEVAL); // *** added in 1.0
<PROC_KW>\\eqevalp return(EQEVALP); // *** added in 1.0
<PROC_KW>\\tseries return(TSERIES); // *** added in 1.0
%{
// ------------------- Other keywords
// *** moved preferredunits, precision, precision_type, scientific_limits, eqlang and eqpath to key-value lexing
// *** replaced printoptions by eqcoptions in 1.1
%}
<PROC_KW>\\printeq return (PRINT); // *** added in 0.6, removed return type str in 0.8
<PROC_KW>\\printvector return (PRINTV); // *** added in 1.0
<PROC_KW>\\defunit return (DEFUNIT);
<PROC_KW>\\deleq return (DELETE); // *** added in 0.6
<PROC_KW>\\constant return (CONSTANT);
<PROC_KW>\\function return (FUNCTION); // *** added in 0.5
<PROC_KW>\\matrix return (MATRIX); // *** added in 1.4.3
<PROC_KW>\\deffunc return (DEFFUNC); // *** added in 0.5
<PROC_KW>\\eqcoptions return (EQCOPTIONS); // *** added in 1.1
<PROC_KW>\\dumpeq return (DUMP); // *** added in 0.8
<PROC_KW>\\usepackage return (USEPACKAGE);
<PROC_KW>\\clearequations return (CLEAREQUATIONS);
<PROC_KW>\\input return (INPUT);
%{
// Environments
// *** added in 0.8
%}
<PROC_KW>\\begin return (BEGINENV);
<PROC_KW>\\end return (ENDENV);
%{
// Latex commands that are ignored to avoid mistakenly parsing them *** added in 0.4
%}
<PROC_KW>\\renewcommand return (RENEWCOMMAND); // *** added in 0.4 TODO: handle this with enter_state(scan_stm)?
%{
// This must be here because of the later catch-all rule for macros
%}
<PROC_KW>\\verb. {
output.write (yytext, yyleng);
verbatim_delimiter = yytext[yyleng - 1];
exit_state(); // stop scanning this keyword
enter_state(verb); // *** changed to enter_state() in 0.8
}
%{
// ---------------------- Separators and text ------------------
%}
<PROC_KW>\" { enter_state(scan_qstr); } // *** added in 1.2
<PROC_KW>{SEPARATOR} {
MSG_INFO(10) << "Found separator: " << yytext << endline;
return (yytext[0]);
}
%{
// Names for key-value pairs *** added in 1.1
// *** added difftype in 1.4.1
%}
<PROC_KW>units yylval.opt = o_units; return(OPT_L);
<PROC_KW>precision yylval.opt = o_precision; return(OPT_E);
<PROC_KW>fixeddigits yylval.opt = o_fixeddigits; return(OPT_B);
<PROC_KW>lowsclimit yylval.opt = o_lowsclimit; return(OPT_E);
<PROC_KW>highsclimit yylval.opt = o_highsclimit; return(OPT_E);
<PROC_KW>lang yylval.opt = o_lang; return(OPT_S);
<PROC_KW>path yylval.opt = o_path; return(OPT_S);
<PROC_KW>eqparse yylval.opt = o_eqparse; return(OPT_B);
<PROC_KW>eqraw yylval.opt = o_eqraw; return(OPT_B);
<PROC_KW>eqchain yylval.opt = o_eqchain; return(OPT_B);
<PROC_KW>eqalign yylval.opt = o_eqalign; return(OPT_A);
<PROC_KW>eqginac yylval.opt = o_eqginac; return(OPT_B);
<PROC_KW>eqsplit yylval.opt = o_eqsplit; return(OPT_E);
<PROC_KW>eqsplittext yylval.opt = o_eqsplittext; return(OPT_S);
<PROC_KW>vecautosize yylval.opt = o_vecautosize; return(OPT_E);
<PROC_KW>difftype yylval.opt = o_difftype; return(OPT_S);
<PROC_KW>label return(O_LABEL);
<PROC_KW>save return(SAVE);
<PROC_KW>restore return(RESTORE);
<PROC_KW>true yylval.boolean = true; return(BOOL);
<PROC_KW>false yylval.boolean = false; return(BOOL);
<PROC_KW>onlyleft yylval.align = onlyleft; return(ALIGN); // *** added in 1.2
<PROC_KW>both yylval.align = both; return(ALIGN);
<PROC_KW>none yylval.align = none; return(ALIGN);
<PROC_KW>debug yylval.opt = o_debug; return(OPT_E); // *** added in 1.4.3
<PROC_KW>cleancomments yylval.opt = o_comments; return(OPT_B); // *** added in 1.4.3
<PROC_KW>tan_is_tg yylval.opt = o_tan; return(OPT_B); // *** added in 1.4.3
%{
// Other words
%}
<PROC_KW>([^\\%*"=&;{[}\]\t\n ]|\\%)+ {
// A WORD can contain anything that is not a SEPARATOR, a MACRO,
// a comment or a space. Note that we have to make provision for
// escaped % signs
yylval.str = new std::string(yytext);
MSG_INFO(10) << "Found word: " << yytext << endline;
return (WORD);
}
<PROC_KW>\\[[:alpha:]]+ { // any other macro names
yylval.str = new std::string(yytext);
MSG_INFO(10) << "Found other macro " << yytext << endline;
return (MACRO);
}
<PROC_KW>[[:space:]]+ { // count the lines and eat the space. TODO: Use find()?
for (int i=0; i < yyleng; i++)
if (yytext[i] == '\n') linenumber++;
}
<PROC_KW>\\ { // any remaining backslashes TODO: Is this possible?
yylval.str = new std::string(yytext);
return (WORD);
}
%{
// =============== Verbatim environments (verb,verbatim) =================
// Enter: From keyword
// Exit: When environment is finished
// *** added in 0.4
// *** moved recognition of \begin{verbatim} to PROC_KW in 0.8
%}
<VERB,VERBATIM>\n { // *** changed order of this and following rule in 0.6
output << std::endl;
linenumber++;
}
<VERB>. { output.put(yytext[0]);
if (yytext[0] == verbatim_delimiter) {
// *** changed INITAL to SCANSTM in 0.6, changed to exit_state() in 0.8
exit_state();
}
}
<VERBATIM>\\end[[:space:]]*\{[[:space:]]*verbatim[[:space:]]*\} { // TODO: comments inside the \end{...} are not possible
// *** changed INITAL to SCANSTM in 0.6, changed to exit_state() in 0.8
output.write(yytext, yyleng);
exit_state();
}
<VERBATIM>.* output.write(yytext, yyleng);
%{
// =================== states SCANSTR (scan_str) ====================
// Enter: From RENEWCOMMAND keyword
// Exit: When the final closing bracket is found (returns STRING)
// *** added in 0.6, major rewrite in 0.8, now largely replaced by SCANEX
%}
<SCANSTR>\n state->argument += yytext; linenumber++;
<SCANSTR>{LBSIZE}{BOPEN}|{BOPEN} { // Opening brackets while scanning for a string/expression
MSG_INFO(10) << "Found opening bracket. Bracket level: " << state->bracketlevel
<< ", STRING: '" << state->argument << "'" << endline;
state->bracketlevel++; // *** bracketlevel replaces fbracketstack in 0.8
state->argument += yytext;
}
<SCANSTR>{RBSIZE}{BCLOSE}|{BCLOSE} { // Closing brackets while scanning for a string
MSG_INFO(10) << "Closing bracket. Bracket level: " << state->bracketlevel
<< ", STRING: '" << state->argument << "'" << endline;
if (state->bracketlevel == 1) { // The string is finished
MSG_INFO(10) << "Bracket is final closing bracket." << endline;
yyless(0);
yylval.str = new std::string(state->argument);
exit_state();
return (STRING); // Rescan the bracket in keyword mode
} else { // Just a closing bracket inside the string
state->bracketlevel--; // *** moved bracket checking to the parser in 0.8
state->argument += yytext;
}
}
<SCANSTR>\\% state->argument += yytext; // *** made this a separate rule in 0.8
<SCANSTR>%.*\n state->argument += yytext; linenumber++;
<SCANSTR>. { // *** changed and moved this behind the comment rule in 0.8
// removed check for closing bracket in 0.8 because it can never occur here!
// TODO: How can this be made more efficient? [^{}()[\]\\]* does not work!
state->argument += yytext;
}
%{
// =================== state SCANQSTR (scan_qstr) ====================
// Scan a string between quotes ("")
// Enter: From scan_ex or proc_kw when a " is encountered
// Exit: When the next " is found (returns QSTRING)
%}
<SCANQSTR>\\\" state->argument += yytext;
<SCANQSTR>\" { /* Scan up to the closing \" of the equation label */
yylval.str = new std::string(state->argument);
MSG_INFO(10) << "Found QSTRING '" << *yylval.str << "'" << endline;
exit_state();
return (QSTRING);
}
<SCANQSTR>\n { msg::error(0) << "Error: Newline in string constant. Discarding." << endline;
linenumber++;
}
<SCANQSTR>. state->argument += yytext; // TODO: make this more efficient
%{
// =================== state SCANEX (scan_ex) ====================================
// ------------------------------------------------------
// Special EQC keywords that may occur in expressions
// *** handled in keyword mode since 0.8
// ------------------------------------------------------
// Latex macros
// *** \frac since 0.8 handled in keyword mode to avoid problems with implicit muls between the two arguments
// *** \lhs and \rhs since 0.8 handled in keyword mode
%}
<SCANEX>\\frac|\\tfrac|\\dfrac { // *** added \tfrac and \dfrac in 0.8
if (check_implicit_mul(NAME)) return (IMPMUL);
state->implicit_mul_might_follow = true;
enter_state(keyword);
yylval.str = new std::string(yytext); // *** added in 0.9 for the ltxrepr
return (FRAC);
}
<SCANEX>\\over state->implicit_mul_might_follow = false; return (OVER);
<SCANEX>\\unit { // *** changed in 1.4.3 for the ltxrepr
if (check_implicit_mul(NAME)) return (IMPMUL);
state->implicit_mul_might_follow = true;
enter_state(keyword);
yylval.str = new std::string(yytext);
return (KWUNIT);
}
<SCANEX>\\ensuremath state->implicit_mul_might_follow = false; /* eat this. TODO: what about the ltxrepr? */
<SCANEX>\\usk /* eat this. TODO: what about the ltxrepr?*/
<SCANEX>\\cdot {
state->implicit_mul_might_follow = false;
yylval.str = new std::string("\\cdot "); // *** added in 1.0
return (IMPMUL); /* Treat as multiplication symbol */
}
<SCANEX>\\per state->implicit_mul_might_follow = false; return ('/'); /* treat as division symbol. TODO: what about the ltxrepr? */
/* *** handling of other macros moved down to symbol handling in 0.5 */
%{
// ------------------------------------------------------
// normal symbols, unit or function names, incomplete subscripts
%}
<SCANEX>{ULETTER}{SUBSCRIPT}|{ULETTER}|\\{NAME}{SUBSCRIPT}|\\{NAME}\ {SUBSCRIPT}|\\{ULETTER}+ {
// Note: This means that f \rhs{"prev"} does not mean f * rhs{"prev"}, but f(\rhs{"prev"})
MSG_INFO(9) << "Found text: " << std::string(yytext) << endline;
if (check_implicit_mul(NAME)) return (IMPMUL); // *** changed '*' to IMPMUL in 0.5
yylval.str = new std::string(yytext);
remove_space(*yylval.str); // *** added in 1.0
state->implicit_mul_might_follow = true;
if (Unit::is_unitname(*yylval.str)) { // The name is a unit
state->last_token = UNIT;
if ((*yylval.str)[0] == '\\')
yylval.str->erase(0,1); // TODO: allow unit names which are not macros?
return (UNIT);
}
if (func::is_a_func(*yylval.str)) { // The name is a function
MSG_INFO(9) << "Found function " << *(yylval.str) << endline;
// Note that the function might or might not be followed by an argument!
// The case FUNC BOPEN is handled in check_implicit_mul
state->last_token = FUNC;
return (FUNC);
}
if (*yylval.str == "\\integral") { // *** added in 1.3.1
MSG_INFO(1) << "Found integral" << endline;
state->last_token = FUNC;
return INTEGRAL;
}
if (set_has(eq_statement, yytext)) { // The name is an equation statement
MSG_INFO(10) << "Found equation statement " << yytext << endline;
yyless(0); // Rescan the macro as a keyword
enter_state(keyword);
} else if (set_has(ex_statement, yytext)) { // The name is an expression statement
MSG_INFO(10) << "Found expression statement " << yytext << endline;
yyless(0); // Rescan the macro as a keyword
enter_state(keyword);
} else { // A (possibly incomplete) variable name
unsigned int pos = yylval.str->find("_");
if (pos > yylval.str->size()) {
// The symbol has no subscript yet, so we assume that a subscript might follow later
state->last_token = NAME;
} else if (state->last_token == '^') {
// The subscript does not belong to this symbol. Rescan it.
yylval.str->erase(pos);
yyless(pos);
state->last_token = NAME;
} else {
state->last_token = SYMBOL;
}
MSG_INFO(9) << "Found " << ((state->last_token == NAME) ? "name: " : "symbol ")
<< *yylval.str << endline;
return (state->last_token);
}
}
<SCANEX>{SUBSCRIPT} { // *** added in 0.5
yylval.str = new std::string(yytext);
state->implicit_mul_might_follow = true;
state->last_token = SUBSCRIPT;
MSG_INFO(9) << "Found subscript " << *yylval.str << endline;
return (SUBSCRIPT);
}
%{
// ------------------------------------------------------
// Numbers and labels
// TODO: what about \\\" inside a label?
%}
<SCANEX>{DIGIT} { if (check_implicit_mul(DIGIT)) return (IMPMUL); // *** changed '*' to IMPMUL in 0.5
state->implicit_mul_might_follow = true;
state->last_token = DIGIT;
yylval.str = new std::string(yytext);
return (DIGIT);
}
<SCANEX>\" { enter_state(scan_qstr); }
%{
// ------------------------------------------------------
// Brackets
// Added the space between LBSIZE and BOPEN in 1.0
%}
<SCANEX>\{\} { // Treat as whitespace *** added in 1.0
if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
state->last_token = ' ';
}
<SCANEX>{LBSIZE}\ {BOPEN}|{LBSIZE}{BOPEN}|{BOPEN} { /* Opening brackets */
// TODO: Outer brackets around equations are not recognized for the ltxrepr of an ex!
// e.g. {x = 5} would set the ltxrepr of x to '{x '
if (check_implicit_mul(BOPEN)) return (IMPMUL); // *** changed '*' to IMPMUL in 0.5
state->implicit_mul_might_follow = false; // *** added in 0.5
state->last_token = BOPEN; // *** added in 0.5
yylval.str = new std::string(yytext);
remove_space(*yylval.str); // *** added in 1.0
state->bracketlevel++; // *** bracketlevel replaces fbracketstack in 0.8
MSG_INFO(10) << "Found bracket: " << yytext << ", bracket level is "
<< state->bracketlevel << endline;
if (yytext[0] == '{')
return (yytext[0]); // This bracket is treated separately because it is used for Latex groupings
else
return (BOPEN);
}
<SCANEX>{RBSIZE}\ {BCLOSE}|{RBSIZE}{BCLOSE}|{BCLOSE} { /* Closing brackets */
state->implicit_mul_might_follow = true;
state->last_token = BCLOSE;
state->bracketlevel--; // *** bracketlevel replaces fbracketstack in 0.8
MSG_INFO(10) << "Found bracket: " << yytext << ", bracket level is "
<< state->bracketlevel << endline;
if (state->bracketlevel == 0) {
yyless(0); // rescan the bracket in keyword mode
exit_state();
} else {
yylval.str = new std::string(yytext);
remove_space(*yylval.str); // *** added in 1.0
if (yytext[0] == '}')
return (yytext[0]);
else
return (BCLOSE);
}
}
%{
// ------------------------------------------------------
// One-letter tokens and the rest
%}
<SCANEX>\%.*\n linenumber++; /* Eat comments */
<SCANEX>\n { // Eat whitespace
if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
state->last_token = ' ';
linenumber++;
}
<SCANEX>[ \t]+ { // Eat whitespace
if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
state->last_token = ' ';
}
<SCANEX>\\, { // Treat as whitespace *** added in 1.0 TODO: What about the latex representation?
if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
state->last_token = ' ';
}
<SCANEX>. { /* everything else */
MSG_INFO(9) << "Found token " << yytext << endline;
state->implicit_mul_might_follow = false; // Are there any exceptions?
// register the last token for correct parsing of x^y_n -> (x_n)^y, not x^(y_n)
state->last_token = *yytext; // *** added in 0.5
return (*yytext);
}
%%
std::stack<YY_BUFFER_STATE> inputfiles; // for keeping track of input buffers
// ------------ Routines for interfacing with the parser -------------
bool new_input_buffer (const std::string &fname) {
//TODO: Use push_buffer!!!!
if (yyin != 0) // This is not the first buffer opened
inputfiles.push(YY_CURRENT_BUFFER);
FILE *yyin_old = yyin; //*** introduced this variable in 0.5 because the old method was buggy
yyin = fopen(fname.c_str(), "r");
if (yyin == NULL) // *** Added search in TEXPATH in version 0.2, changed in 1.1
yyin = fopen((*(optstack::options->get(o_path)).str+fname).c_str(), "r");
if (yyin == NULL) { //*** changed in 0.5 because of a bug
yyin = yyin_old;
inputfiles.pop();
return (false);
} else {
yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
return (true);
}
} // new_input_buffer()
bool pop_input_buffer() {
yy_delete_buffer(YY_CURRENT_BUFFER);
fclose(yyin);
if (!inputfiles.empty()) {
yy_switch_to_buffer(inputfiles.top());
inputfiles.pop();
return (true);
} else
return (false);
} // pop_input_buffer()
// ----------------- Routines to handle start conditions -------------------
void enter_state(const scanmode m) { // *** added in 0.6, changed in 0.8
if (!states.empty())
msg::info(10) << "Argument is " << state->argument << endline;
state = new staterec;
switch (m) {
case keyword: { state->start = PROC_KW; break; }
case verb: { state->start = VERB; break; }
case verbatim: { state->start = VERBATIM; break; }
case scan_str: {
state->start = SCANSTR;
state->bracketlevel = 1; // *** bracketlevel replaces fbracketstack in 0.8
break;
}
case scan_ex: {
state->start = SCANEX;
state->bracketlevel = 1; // *** bracketlevel replaces fbracketstack in 0.8
break;
}
case scan_stm: { state->start = INITIAL; break; }
case scan_qstr: { state->start = SCANQSTR; break; }
default: {
state->start = INITIAL; // *** added in 1.0
throw std::runtime_error("Internal error: Start condition does not exist");
}
}
states.push(state);
BEGIN(state->start);
MSG_INFO(9) << "Beginning state " << state->start << endline;
} // enter_state()
void exit_state() { // *** added in 0.8
if (!states.empty()) {
states.pop();
if (!states.empty()) {
delete(state);
state = states.top();
} else {
delete(state);
}
} else {
msg::error(0) << "Internal error: State stack is empty while parsing file." << endline;
}
BEGIN((states.empty() ? INITIAL : state->start));
MSG_INFO(9) << "Exiting to state " << YY_START << endline;
if (!states.empty())
msg::info(10) << "Argument is " << state->argument << endline;
} // exit_state()
void clear_state() { // *** added in 0.8
if (!states.empty())
msg::warn(0) << "Warning: State stack has " << states.size() << " frames left." << endline;
while (!states.empty()) states.pop(); // TODO: Is there a method for this in the class?
BEGIN(INITIAL);
} // clear_state()
const scanmode get_state() { // *** added in 0.8
MSG_INFO(10) << "get_state: " << YY_START << endline;
switch (YY_START) {
case VERBATIM: { return verbatim; break; }
case INITIAL: { return scan_stm; break; }
case PROC_KW: { return keyword; break; }
case VERB: { return verb; break; }
case SCANSTR: { return scan_str; break; }
case SCANEX: { return scan_ex; break; }
case SCANQSTR: { return scan_qstr; break; }
default: {
msg::error(0) << "Internal error: Unknown scanmode while parsing file." << endline;
return scan_stm;
}
}
} // get_state()
// ----------------- Helper functions ----------------
// *** removed checksign in 0.8
bool check_implicit_mul(const int token) {
if (state->implicit_mul_might_follow) {
if (((state->last_token == DIGIT) && (token == DIGIT)) ||
((state->last_token == FUNC) && (token == BOPEN)) ||
((state->last_token == FUNC) && (token == NAME))) // *** added FUNC cases in 0.5
return false; // No implicit mul may exist between these two tokens
state->implicit_mul_might_follow = false;
state->last_token = ' ';
yyless(0);
yylval.str = new std::string(" "); // *** added in 1.0
return (true);
} else {
return false;
}
} // check_implicit_mul()
void set_implicit_mul(const bool how) { // *** added in 0.8
state->implicit_mul_might_follow = how;
}
void set_asterisk_statement(const bool how) { // *** added in 1.4.3
state->asterisk_statement = how;
MSG_INFO(10) << "Set asterisk statement to " << (how ? "true" : "false") << endline;
}
const bool get_asterisk_statement() { // *** added in 1.4.3
return state->asterisk_statement;
}
void remove_space(std::string &what) { // Erase space inside names *** added in 1.0
unsigned int pos;
while ((pos = what.find(" ")) <= what.size()) what.erase(pos,1);
}