Latex Equation Compiler Code

Brought to you by: jrheinlaender
[r11]: / src / ltxfilelex.ll Maximize Restore History
765 lines (722 with data), 36.7 kB

/***************************************************************************
    ltxfilelex.ll  -  rules for reading EQC commands from a ltx file
    - scanner generation file for flex
                             -------------------
    begin                : Sun Oct 21 2001
    copyright            : (C) 2001 by Jan Rheinlaender
    email                : jrheinlaender@users.sourceforge.net
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

%{
  #include "ltxfilelex.h"
  // The following includes are necessary for ltxfileparse.h
  #include "equation.h"
  #include "ltxfileparse.h"
  #include <stdio.h>
  #include "unit.h"
  #include "func.h" // *** added in 0.5
  #include "msgdriver.h" // *** added in 1.3.1
  #include "utils.h" // *** added in 0.8
  #include "optstack.h" // *** added in 1.2

  // This can also be done by calling flex with --nounistd *** added in 1.4.1
#ifdef _MSC_VER
  #define YY_NO_UNISTD_H
#endif

  std::string scanline = ""; // for locating error positions *** added in 0.8, TODO: Unused!!!
  int linenumber = 1; // for counting the lines
  char verbatim_delimiter; // *** added in 0.4
  // *** Moved definition of eqcpath to compiler in 1.1
  // List of keywords that create equations and expressions
  // *** added \eqeval, \eqevalp and \tseries in 1.0
  // *** removed floor and ceil in 1.2
  const char *eqstm[13] = {"\\eq","\\eqadd","\\eqsub","\\eqmul","\\eqdiv","\\eqpow","\\eqfunc",
                           "\\eqdiff","\\eqsubst","\\eqsubstc", "\\eqrev","\\eqsimpf",
                           "\\eqsolve"};
  std::set<const char*, ltstr> eq_statement(eqstm, eqstm+13);
  const char *exstm[15] = {"\\lhs","\\rhs","\\val","\\valwith","\\quantity","\\quantitywith",
                           "\\units","\\unitswith","\\numval","numvalwith",
                           "\\numer","\\denom", "\\eqeval", "\\eqevalp", "\\tseries"};
  std::set<const char*, ltstr> ex_statement(exstm, exstm+15);

  // Stack for keeping track of scanning states. state.top()->start always is the current
  // start condition
  struct staterec {
    int start; // The flex start condition
    std::string argument; // *** added in 0.6
    int bracketlevel; // *** replaces fbracketstack in 0.8
    bool implicit_mul_might_follow; // Could an implicit multiplication follow after this token?
    bool last_was_newline; // Avoid producing empty lines, e.g. \eq*{x=3} on a line by itself would do that *** added in 1.4.3
    bool asterisk_statement;
    int last_token; // Stores the last token in expression scanning mode
    staterec() {
      argument = "";
      bracketlevel = 0;
      implicit_mul_might_follow = false;
      last_was_newline = false; // *** added in 1.4.3
      asterisk_statement = false; // *** added in 1.4.3
      last_token = ' ';
    };
  };
  std::stack<staterec*> states;
  staterec* state; // Contains the current state to avoid using states.top() all the time
  // Note: An implicit multiplication occurs when the multiplication sign '*' is omitted, i.e.
  // '3x+4y' has two implicit multiplications. Parsing this is difficult because adding a rule
  // like exp exp -> exp leads to numerous shift/reduce conflicts. The way to avoid it is to add
  // some ugly hacks to the scanner. The logic behind these hacks is:
  // An implicit multiplication might follow the tokens SYMBOL, DIGIT, UNIT, MACRO or any kind of closing bracket
  // It will only follow if the next token is SYMBOL, MACRO, DIGIT, UNIT, FUNC, FRAC, VALUE or any kind
  // of opening bracket.
  // There are some exceptions:
  // - No implicit mul may occur between a function name and an opening bracket. Note that this
  //   case is ambiguous! EQC assumes that any opening bracket after a function name belongs to
  //   the function
  // In case an implicit mul is detected, the scanned token is put back on the input stream and IMPMUL returned instead.
  // *** corrected bug which parsed 230 1/\s as 2301/s in 0.9

  // Forward declaration of helper functions
  bool check_implicit_mul(const int token);
  void remove_space(std::string &what);
%}

%x PROC_KW VERB VERBATIM SCANSTR SCANQSTR SCANEX

%option noyywrap
%option never-interactive 
/* added in 1.4.1 for MSVC compile */

/* Unicode, see
   http://groups.google.com/group/comp.compilers/browse_thread/thread/2d2bff9c324d8e05 
   added in 1.4.3 */
UTF8_BYTE_ORDER_MARK   [\xEF][\xBB][\xBF] 

ASCII	    [\x0-\xFF]
U2          [\xC2-\xDF][\x80-\xBF]
U3          [\xE0][\xA0-\xBF][\x80-\xBF]
U4          [\xE1-\xEC][\x80-\xBF][\x80-\xBF]
U5          [\xED][\x80-\x9F][\x80-\xBF]
U6          [\xEE-\xEF][\x80-\xBF][\x80-\xBF]
U7          [\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
U8          [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
U9          [\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]

UNICODE     {ASCII}|{U2}|{U3}|{U4}|{U5}|{U6}|{U7}|{U8}|{U9}
/* Adding [a-zA-Z\%] at the end might seem superfluous, but it isn't (why? */
LASCII      [\x0-\xFF]{-}[[:blank:]]{-}[[:cntrl:]]{-}[[:digit:]]{-}[[:graph:]]{-}[[:print:]]{-}[[:punct:]]{-}[[:space:]]{+}[a-zA-Z\%]
LUNICODE    {LASCII}|{U2}|{U3}|{U4}|{U5}|{U6}|{U7}|{U8}|{U9}

/* For scanning expressions */
LETTER  [a-zA-Z]|\%
ULETTER {LUNICODE}
DIGIT   [0-9]
NAME    {ULETTER}+
SUBSCRIPT _\{[^}]*\}|_{ULETTER}|_{DIGIT}
/* A subscript is enclosed in {} and can contain everything except closing brackets.
   TODO: This definition should correspond with what Latex allows in a subscript!!! */

/* For scanning keywords */
SEPARATOR [*"=&;{[}\]]

/* *** added  bracket definitions in 0.6 (copied from lteqlex.ll) */
LBSIZE    \\left|\\bigl|\\Bigl|\\biggl|\\Biggl
RBSIZE    \\right|\\bigr\||Bigr|\\biggr|\\Biggr
BOPEN     [\{\(\[\|]|\\\{|\\lbrace|\\lbrack|\\lfloor|\\lceil|\\langle
BCLOSE    [\}\)\]\|]|\\\}|\\rbrace|\\rbrack|\\rfloor|\\rceil|\\rangle

%%
  BEGIN((states.empty() ? INITIAL : state->start));  // *** added in 0.8
  MSG_INFO(10) << "Start scanning in state:  " << YY_START << endline;
  if (YY_START == SCANEX) {
    MSG_INFO(10) << "Implicit mul might follow: "
      << (state->implicit_mul_might_follow ? "true" : "false") << endline;
  }

{UTF8_BYTE_ORDER_MARK}  state->argument += yytext;

%{
// =================== state INITIAL (scan_stm) =========================
// Scanning up to the  next statement
// Enter: Once right at the beginning of the input file
// Exit: At the end of the file
// Nesting: Enters state keyword when a macro is found. The macro is put back to be re-scanned.
//            Returns STRING if any text was accumulated in state->argument
//            Else goes on scanning
// Returns: ENDFILE when EOF is found. This either finishes scanning or goes on scanning the
//            file which included this one
// *** added in 0.6, moved to top and changed from SCANSTM to INITIAL in 0.8
%}
\\[[:alpha:]]+ {
            yyless(0); // rescan the macro
            if (state->argument != "") { // There is some text to be output first
              if ((state->argument[0] == '\n') && state->last_was_newline && state->asterisk_statement) {
                MSG_INFO(10) << "Skipping double newline" << endline;
                state->argument.erase(0,1);
                state->last_was_newline = true; // This is still true!
                state->asterisk_statement = false;
              } else if ((state->argument.length() > 1) && (state->argument[0] == '%') && (state->argument[1] == '\n') 
                         && state->last_was_newline && state->asterisk_statement) {
                MSG_INFO(10) << "Skipping double commented newline" << endline;
                state->argument.erase(0,2);
                state->last_was_newline = true;
                state->asterisk_statement = false;
              } else if (state->argument[state->argument.length() - 1] == '\n') { // *** added in 1.4.3
	        MSG_INFO(10) << "Found ending newline" << endline;
                state->last_was_newline = true;
                state->asterisk_statement = false;
              } else {
                state->last_was_newline = false;
                state->asterisk_statement = false;
              }
              yylval.str = new std::string(state->argument);
              MSG_INFO(10) << "Found STRING '" << *yylval.str << "'" << endline;
              state->argument = ""; // Clear argument for continued scanning after the keyword!
              enter_state(keyword); // *** changed from BEGIN(INITIAL) in 0.8
              return (STRING); // *** changed ARGUMENT to STRING in 0.8
            }
            enter_state(keyword); // just go on scanning in keyword mode
          }
\\|\\%      state->argument += yytext; // handle lone backslashes and escaped percent signs
\n          state->argument += yytext; linenumber++;
[^\\\n\%]*  state->argument += yytext;
^\%.*\n     {
	      if (!(optstack::options->get(o_comments).boolean)) 
                state->argument += yytext; // Otherwise clean comment lines from output file
              linenumber++;
            }
\%.*\n	  state->argument += yytext; linenumber++;
<<EOF>>   { // *** removed BEGIN(INITIAL) in 0.8
            state->argument += yytext; // TODO: Is this necessary?
            yylval.str = new std::string(state->argument);
            return(ENDFILE);
          }
%{
// =================== state PROC_KW (keyword) =======================
// Enter: When scan_stm finds a macro
// Exit:  When the parser finishes with the statement including all arguments
// Nesting: To scan_str for RENEWCOMMAND or scan_ex, depending on the statement
//            Note: scan_ex might change to keyword again, resulting in nesting of keywords
// Returns: The keyword when the macro is a keyword
//          MACRO if it isn't, then the parser exits to scan_stm immediately
//          MACRO which occurs inside arguments to a statement
//          WORD (pure text string) which occurs inside arguments
//          A separator if one is found
// Changes: Exits and enters verb when \verb is found
//          Exits and enters verbatim if the BEGIN statement finds a verbatim environment
// ------------------- Comments ----------------------------
// *** comment handling split up into the different states in 0.7
// *** changed from INITAL to PROC_KW in 0.8
%}
<PROC_KW>\%.*\n  { // Comments can always be eaten since all macros are returned directly
                   // to the compiler, so they can not accidently concatenate with following text
                   linenumber++;
                 }
%{
// ------------------- Keywords ----------------------------
// *** removed token SPACE in 0.6
// ------------------- Keywords for statements that return an equation
%}
<PROC_KW>\\eq                   return(EQUATION);
<PROC_KW>\\eqadd                yylval.str = new std::string("add"); return(EQOP);
<PROC_KW>\\eqsub                yylval.str = new std::string("sub"); return(EQOP);
<PROC_KW>\\eqmul                yylval.str = new std::string("mul"); return(EQOP);
<PROC_KW>\\eqdiv                yylval.str = new std::string("div"); return(EQOP);
<PROC_KW>\\eqpow                yylval.str = new std::string("pow"); return(EQOP); // *** added in 0.6
<PROC_KW>\\eqfunc               yylval.str = new std::string("func"); return(EQOP); // *** added in 0.9
<PROC_KW>\\eqdiff               yylval.str = new std::string("diff"); return(EQOP); // *** added in 0.6
<PROC_KW>\\eqsubst              yylval.str = new std::string("subst"); return (EQSUBST); // *** changed in 0.9
<PROC_KW>\\eqsubstc             yylval.str = new std::string("substc"); return (EQSUBST); // *** added in 0.9
<PROC_KW>\\eqrev                return (EQREV);
<PROC_KW>\\eqsimpf              return (EQSIMPF); // *** added in 0.6
<PROC_KW>\\eqsolve              return(EQSOLVE); // *** added in 0.8

%{
// ------------------- Keywords for statements that return an expression
// *** added tokens ...with in 1.2, removed ceil and floor
%}
<PROC_KW>\\val                  yylval.str = new std::string("\\val"); return(VALUE);
<PROC_KW>\\quantity             yylval.str = new std::string("\\quantity"); return(VALUE);
<PROC_KW>\\units                yylval.str = new std::string("\\units"); return(VALUE);
<PROC_KW>\\numval               yylval.str = new std::string("\\numval"); return(VALUE);
<PROC_KW>\\valwith              yylval.str = new std::string("\\val"); return(VALUEWITH);
<PROC_KW>\\quantitywith         yylval.str = new std::string("\\quantity"); return(VALUEWITH);
<PROC_KW>\\unitswith            yylval.str = new std::string("\\units"); return(VALUEWITH);
<PROC_KW>\\numvalwith           yylval.str = new std::string("\\numval"); return(VALUEWITH);
<PROC_KW>\\lhs                  yylval.str = new std::string("\\lhs"); return(LHSRHS);
<PROC_KW>\\rhs                  yylval.str = new std::string("\\rhs"); return(LHSRHS);
<PROC_KW>\\numer                return(NUMER); // *** numer and denom added in 0.8
<PROC_KW>\\denom                return(DENOM);
<PROC_KW>\\eqeval               return(EQEVAL); // *** added in 1.0
<PROC_KW>\\eqevalp              return(EQEVALP); // *** added in 1.0
<PROC_KW>\\tseries              return(TSERIES); // *** added in 1.0
%{
// ------------------- Other keywords
// *** moved preferredunits, precision, precision_type, scientific_limits, eqlang and eqpath to key-value lexing
// *** replaced printoptions by eqcoptions in 1.1
%}
<PROC_KW>\\printeq              return (PRINT); // *** added in 0.6, removed return type str in 0.8
<PROC_KW>\\printvector          return (PRINTV); // *** added in 1.0
<PROC_KW>\\defunit              return (DEFUNIT);
<PROC_KW>\\deleq                return (DELETE); // *** added in 0.6
<PROC_KW>\\constant             return (CONSTANT);
<PROC_KW>\\function             return (FUNCTION);  // *** added in 0.5
<PROC_KW>\\matrix		return (MATRIX); // *** added in 1.4.3
<PROC_KW>\\deffunc              return (DEFFUNC);   // *** added in 0.5
<PROC_KW>\\eqcoptions           return (EQCOPTIONS); // *** added in 1.1
<PROC_KW>\\dumpeq               return (DUMP); // *** added in 0.8
<PROC_KW>\\usepackage           return (USEPACKAGE);
<PROC_KW>\\clearequations       return (CLEAREQUATIONS);
<PROC_KW>\\input                return (INPUT);
%{
// Environments
// *** added in 0.8
%}
<PROC_KW>\\begin                return (BEGINENV);
<PROC_KW>\\end                  return (ENDENV);
%{
// Latex commands that are ignored to avoid mistakenly parsing them *** added in 0.4
%}
<PROC_KW>\\renewcommand         return (RENEWCOMMAND); // *** added in 0.4 TODO: handle this with enter_state(scan_stm)?
%{
// This must be here because of the later catch-all rule for macros
%}
<PROC_KW>\\verb.      {
                        output.write (yytext, yyleng);
                        verbatim_delimiter = yytext[yyleng - 1];
                        exit_state(); // stop scanning this keyword
                        enter_state(verb); // *** changed to enter_state() in 0.8
                      }
%{
// ---------------------- Separators and text ------------------
%}
<PROC_KW>\"     { enter_state(scan_qstr); } // *** added in 1.2
<PROC_KW>{SEPARATOR} {
                   MSG_INFO(10) << "Found separator: " << yytext << endline;
                   return (yytext[0]);
                 }
%{
// Names for key-value pairs *** added in 1.1
// *** added difftype in 1.4.1
%}
<PROC_KW>units          yylval.opt = o_units;        return(OPT_L);
<PROC_KW>precision      yylval.opt = o_precision;    return(OPT_E);
<PROC_KW>fixeddigits    yylval.opt = o_fixeddigits;  return(OPT_B);
<PROC_KW>lowsclimit     yylval.opt = o_lowsclimit;   return(OPT_E);
<PROC_KW>highsclimit    yylval.opt = o_highsclimit;  return(OPT_E);
<PROC_KW>lang           yylval.opt = o_lang;         return(OPT_S);
<PROC_KW>path           yylval.opt = o_path;         return(OPT_S);
<PROC_KW>eqparse        yylval.opt = o_eqparse;      return(OPT_B);
<PROC_KW>eqraw          yylval.opt = o_eqraw;        return(OPT_B);
<PROC_KW>eqchain        yylval.opt = o_eqchain;      return(OPT_B);
<PROC_KW>eqalign        yylval.opt = o_eqalign;      return(OPT_A);
<PROC_KW>eqginac        yylval.opt = o_eqginac;      return(OPT_B);
<PROC_KW>eqsplit        yylval.opt = o_eqsplit;      return(OPT_E);
<PROC_KW>eqsplittext    yylval.opt = o_eqsplittext;  return(OPT_S);
<PROC_KW>vecautosize    yylval.opt = o_vecautosize;  return(OPT_E);
<PROC_KW>difftype       yylval.opt = o_difftype;     return(OPT_S);
<PROC_KW>label          return(O_LABEL);
<PROC_KW>save           return(SAVE);
<PROC_KW>restore        return(RESTORE);
<PROC_KW>true           yylval.boolean = true;  return(BOOL);
<PROC_KW>false          yylval.boolean = false;  return(BOOL);
<PROC_KW>onlyleft       yylval.align = onlyleft; return(ALIGN); // *** added in 1.2
<PROC_KW>both           yylval.align = both; return(ALIGN);
<PROC_KW>none           yylval.align = none; return(ALIGN);
<PROC_KW>debug		yylval.opt = o_debug; return(OPT_E); // *** added in 1.4.3
<PROC_KW>cleancomments  yylval.opt = o_comments; return(OPT_B); // *** added in 1.4.3
<PROC_KW>tan_is_tg      yylval.opt = o_tan; return(OPT_B); // *** added in 1.4.3

%{
// Other words
%}
<PROC_KW>([^\\%*"=&;{[}\]\t\n ]|\\%)+ {
                   // A WORD can contain anything that is not a SEPARATOR, a MACRO,
                   // a comment or a space. Note that we have to make provision for
                   // escaped % signs
                   yylval.str = new std::string(yytext);
                   MSG_INFO(10) << "Found word: " << yytext << endline;
                   return (WORD);
                 }
<PROC_KW>\\[[:alpha:]]+ { // any other macro names
                   yylval.str = new std::string(yytext);
                   MSG_INFO(10) << "Found other macro " << yytext << endline;
                   return (MACRO);
                 }
<PROC_KW>[[:space:]]+ { // count the lines and eat the space. TODO: Use find()?
                   for (int i=0; i < yyleng; i++)
                     if (yytext[i] == '\n') linenumber++;
                 }
<PROC_KW>\\      { // any remaining backslashes TODO: Is this possible?
                   yylval.str = new std::string(yytext);
                   return (WORD);
                 }
%{
// =============== Verbatim environments (verb,verbatim) =================
// Enter: From keyword
// Exit:  When environment is finished
// *** added in 0.4
// *** moved recognition of \begin{verbatim} to PROC_KW in 0.8
%}
<VERB,VERBATIM>\n     { // *** changed order of this and following rule in 0.6
                        output << std::endl;
                        linenumber++;
                      }
<VERB>.               { output.put(yytext[0]);
                        if (yytext[0] == verbatim_delimiter) {
                          // *** changed INITAL to SCANSTM in 0.6, changed to exit_state() in 0.8
                          exit_state();
                        }
                      }
<VERBATIM>\\end[[:space:]]*\{[[:space:]]*verbatim[[:space:]]*\} { // TODO: comments inside the \end{...} are not possible
                        // *** changed INITAL to SCANSTM in 0.6, changed to exit_state() in 0.8
                        output.write(yytext, yyleng);
                        exit_state();
                      }
<VERBATIM>.*           output.write(yytext, yyleng);
%{
// =================== states SCANSTR (scan_str) ====================
// Enter: From RENEWCOMMAND keyword
// Exit:  When the final closing bracket is found (returns STRING)
// *** added in 0.6, major rewrite in 0.8, now largely replaced by SCANEX
%}
<SCANSTR>\n   state->argument += yytext; linenumber++;
<SCANSTR>{LBSIZE}{BOPEN}|{BOPEN} { // Opening brackets while scanning for a string/expression
              MSG_INFO(10) << "Found opening bracket. Bracket level: " << state->bracketlevel
                                 << ", STRING: '" << state->argument << "'" << endline;
              state->bracketlevel++; // *** bracketlevel replaces fbracketstack in 0.8
              state->argument += yytext;
             }
<SCANSTR>{RBSIZE}{BCLOSE}|{BCLOSE} { // Closing brackets while scanning for a string
              MSG_INFO(10) << "Closing bracket. Bracket level: " << state->bracketlevel
                                 << ", STRING: '" << state->argument << "'" << endline;

              if (state->bracketlevel == 1) { // The string is finished
                MSG_INFO(10) << "Bracket is final closing bracket." << endline;
                yyless(0);
                yylval.str = new std::string(state->argument);
                exit_state();
                return (STRING); // Rescan the bracket in keyword mode
              } else { // Just a closing bracket inside the string
                state->bracketlevel--; // *** moved bracket checking to the parser in 0.8
                state->argument += yytext;
              }
            }
<SCANSTR>\\%   state->argument += yytext; // *** made this a separate rule in 0.8
<SCANSTR>%.*\n state->argument += yytext; linenumber++;
<SCANSTR>. { // *** changed and moved this behind the comment rule in 0.8
             // removed check for closing bracket in 0.8 because it can never occur here!
             // TODO: How can this be made more efficient? [^{}()[\]\\]* does not work!
             state->argument += yytext;
            }
%{
// =================== state SCANQSTR (scan_qstr) ====================
// Scan a string between quotes ("")
// Enter: From scan_ex or proc_kw when a " is encountered
// Exit:  When the next " is found (returns QSTRING)
%}
<SCANQSTR>\\\" state->argument += yytext;
<SCANQSTR>\" { /* Scan up to the closing \" of the equation label */
              yylval.str = new std::string(state->argument);
              MSG_INFO(10) << "Found QSTRING '" << *yylval.str << "'" << endline;
              exit_state();
              return (QSTRING);
            }
<SCANQSTR>\n { msg::error(0) << "Error: Newline in string constant. Discarding." << endline;
              linenumber++;
            }
<SCANQSTR>.  state->argument += yytext; // TODO: make this more efficient

%{
// =================== state SCANEX (scan_ex) ====================================
// ------------------------------------------------------
// Special EQC keywords that may occur in expressions
// *** handled in keyword mode since 0.8
// ------------------------------------------------------
// Latex macros
// *** \frac since 0.8 handled in keyword mode to avoid problems with implicit muls between the two arguments
// *** \lhs and \rhs since 0.8 handled in keyword mode
%}
<SCANEX>\\frac|\\tfrac|\\dfrac { // *** added \tfrac and \dfrac in 0.8
                      if (check_implicit_mul(NAME)) return (IMPMUL);
                      state->implicit_mul_might_follow = true;
                      enter_state(keyword);
                      yylval.str = new std::string(yytext); // *** added in 0.9 for the ltxrepr
                      return (FRAC);
                    }
<SCANEX>\\over        state->implicit_mul_might_follow = false; return (OVER);
<SCANEX>\\unit      { // *** changed in 1.4.3 for the ltxrepr
                      if (check_implicit_mul(NAME)) return (IMPMUL);
                      state->implicit_mul_might_follow = true;
                      enter_state(keyword);
                      yylval.str = new std::string(yytext); 
                      return (KWUNIT);
                    }
<SCANEX>\\ensuremath  state->implicit_mul_might_follow = false; /* eat this. TODO: what about the ltxrepr? */
<SCANEX>\\usk         /* eat this. TODO: what about the ltxrepr?*/
<SCANEX>\\cdot      {
                      state->implicit_mul_might_follow = false;
                      yylval.str = new std::string("\\cdot "); // *** added in 1.0
                      return (IMPMUL); /* Treat as multiplication symbol */
                    }
<SCANEX>\\per         state->implicit_mul_might_follow = false; return ('/'); /* treat as division symbol. TODO: what about the ltxrepr? */
              /* *** handling of other macros moved down to symbol handling in 0.5 */
%{
// ------------------------------------------------------
// normal symbols, unit or function names, incomplete subscripts
%}
<SCANEX>{ULETTER}{SUBSCRIPT}|{ULETTER}|\\{NAME}{SUBSCRIPT}|\\{NAME}\ {SUBSCRIPT}|\\{ULETTER}+ {
         // Note: This means that f \rhs{"prev"} does not mean f * rhs{"prev"}, but f(\rhs{"prev"})
         MSG_INFO(9) << "Found text: " << std::string(yytext) << endline;
         if (check_implicit_mul(NAME)) return (IMPMUL); // *** changed '*' to IMPMUL in 0.5
         yylval.str = new std::string(yytext);
         remove_space(*yylval.str); // *** added in 1.0
         state->implicit_mul_might_follow = true;

         if (Unit::is_unitname(*yylval.str)) { // The name is a unit
           state->last_token = UNIT;
           if ((*yylval.str)[0] == '\\')
             yylval.str->erase(0,1); // TODO: allow unit names which are not macros?
           return (UNIT);
         }

         if (func::is_a_func(*yylval.str)) { // The name is a function
           MSG_INFO(9) << "Found function " << *(yylval.str) << endline;
           // Note that the function might or might not be followed by an argument!
           // The case FUNC BOPEN is handled in check_implicit_mul
           state->last_token = FUNC;
           return (FUNC);
         }

	 if (*yylval.str == "\\integral") { // *** added in 1.3.1
	   MSG_INFO(1) << "Found integral" << endline;
	   state->last_token = FUNC;
           return INTEGRAL;
	 }

         if (set_has(eq_statement, yytext)) { // The name is an equation statement
           MSG_INFO(10) << "Found equation statement " << yytext << endline;
           yyless(0); // Rescan the macro as a keyword
           enter_state(keyword);
         } else if (set_has(ex_statement, yytext)) { // The name is an expression statement
           MSG_INFO(10) << "Found expression statement " << yytext << endline;
           yyless(0); // Rescan the macro as a keyword
           enter_state(keyword);
         } else { // A (possibly incomplete) variable name
           unsigned int pos = yylval.str->find("_");
           if (pos > yylval.str->size()) {
             // The symbol has no subscript yet, so we assume that a subscript might follow later
             state->last_token = NAME;
           } else if (state->last_token == '^') {
             // The subscript does not belong to this symbol. Rescan it.
             yylval.str->erase(pos);
             yyless(pos);
             state->last_token = NAME;
           } else {
             state->last_token = SYMBOL;
           }
           MSG_INFO(9) << "Found " << ((state->last_token == NAME) ? "name: " : "symbol ")
                             << *yylval.str << endline;
           return (state->last_token);
         }
       }
<SCANEX>{SUBSCRIPT} { // *** added in 0.5
         yylval.str = new std::string(yytext);
         state->implicit_mul_might_follow = true;
         state->last_token = SUBSCRIPT;
         MSG_INFO(9)  << "Found subscript " << *yylval.str << endline;
         return (SUBSCRIPT);
        }
%{
// ------------------------------------------------------
// Numbers and labels
// TODO: what about \\\" inside a label?
%}
<SCANEX>{DIGIT} { if (check_implicit_mul(DIGIT)) return (IMPMUL); // *** changed '*' to IMPMUL in 0.5
          state->implicit_mul_might_follow = true;
          state->last_token = DIGIT;
          yylval.str = new std::string(yytext);
          return (DIGIT);
        }
<SCANEX>\" { enter_state(scan_qstr); }
%{
// ------------------------------------------------------
// Brackets
// Added the space between LBSIZE and BOPEN in 1.0
%}
<SCANEX>\{\}   { // Treat as whitespace *** added in 1.0
           if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
                   state->last_token = ' ';
               }
<SCANEX>{LBSIZE}\ {BOPEN}|{LBSIZE}{BOPEN}|{BOPEN} { /* Opening brackets */
              // TODO: Outer brackets around equations are not recognized for the ltxrepr of an ex!
              // e.g. {x = 5} would set the ltxrepr of x to '{x '
              if (check_implicit_mul(BOPEN)) return (IMPMUL); // *** changed '*' to IMPMUL in 0.5
              state->implicit_mul_might_follow = false; // *** added in 0.5
              state->last_token = BOPEN; // *** added in 0.5
              yylval.str = new std::string(yytext);
        remove_space(*yylval.str); // *** added in 1.0
        state->bracketlevel++; // *** bracketlevel replaces fbracketstack in 0.8
              MSG_INFO(10) << "Found bracket: " << yytext << ", bracket level is "
                                 << state->bracketlevel << endline;
              if (yytext[0] == '{')
                return (yytext[0]); // This bracket is treated separately because it is used for Latex groupings
              else
                return (BOPEN);
            }
<SCANEX>{RBSIZE}\ {BCLOSE}|{RBSIZE}{BCLOSE}|{BCLOSE} { /* Closing brackets */
              state->implicit_mul_might_follow = true;
              state->last_token = BCLOSE;
              state->bracketlevel--; // *** bracketlevel replaces fbracketstack in 0.8
              MSG_INFO(10) << "Found bracket: " << yytext << ", bracket level is "
                                 << state->bracketlevel << endline;
              if (state->bracketlevel == 0) {
                yyless(0); // rescan the bracket in keyword mode
                exit_state();
              } else {
                yylval.str = new std::string(yytext);
    remove_space(*yylval.str); // *** added in 1.0
                if (yytext[0] == '}')
                  return (yytext[0]);
                else
                  return (BCLOSE);
              }
            }
%{
// ------------------------------------------------------
// One-letter tokens and the rest
%}
<SCANEX>\%.*\n   linenumber++; /* Eat comments */
<SCANEX>\n     { // Eat whitespace
                 if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
                   state->last_token = ' ';
                 linenumber++;
               }
<SCANEX>[ \t]+ { // Eat whitespace
                 if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
                   state->last_token = ' ';
               }
<SCANEX>\\,    { // Treat as whitespace *** added in 1.0 TODO: What about the latex representation?
           if (state->last_token == DIGIT) // *** added in 0.9 for cases like 5 1/\s
                   state->last_token = ' ';
               }
<SCANEX>.   { /* everything else */
              MSG_INFO(9) << "Found token " << yytext << endline;
              state->implicit_mul_might_follow = false; // Are there any exceptions?
              // register the last token for correct parsing of x^y_n -> (x_n)^y, not x^(y_n)
              state->last_token = *yytext; // *** added in 0.5
              return (*yytext);
            }
%%

  std::stack<YY_BUFFER_STATE> inputfiles; // for keeping track of input buffers

// ------------ Routines for interfacing with the parser -------------
  bool new_input_buffer (const std::string &fname) {
	//TODO: Use push_buffer!!!!
    if (yyin != 0) // This is not the first buffer opened
      inputfiles.push(YY_CURRENT_BUFFER);

    FILE *yyin_old = yyin;    //*** introduced this variable in 0.5 because the old method was buggy
    yyin = fopen(fname.c_str(), "r");

    if (yyin == NULL) // *** Added search in TEXPATH in version 0.2, changed in 1.1
      yyin = fopen((*(optstack::options->get(o_path)).str+fname).c_str(), "r");
    if (yyin == NULL) { //*** changed in 0.5 because of a bug
      yyin = yyin_old;
      inputfiles.pop();
      return (false);
    } else {
      yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
      return (true);
    }
  } // new_input_buffer()

  bool pop_input_buffer() {
    yy_delete_buffer(YY_CURRENT_BUFFER);
    fclose(yyin);
    if (!inputfiles.empty()) {
      yy_switch_to_buffer(inputfiles.top());
      inputfiles.pop();
      return (true);
    } else
      return (false);
  } // pop_input_buffer()

// ----------------- Routines to handle start conditions -------------------
  void enter_state(const scanmode m) { // *** added in 0.6, changed in 0.8
    if (!states.empty())
      msg::info(10) << "Argument is " << state->argument << endline;
    state = new staterec;
    switch (m) {
      case keyword:  { state->start = PROC_KW; break; }
      case verb:     { state->start = VERB; break; }
      case verbatim: { state->start = VERBATIM; break; }
      case scan_str: {
        state->start = SCANSTR;
        state->bracketlevel = 1; // *** bracketlevel replaces fbracketstack in 0.8
        break;
      }
      case scan_ex: {
        state->start = SCANEX;
        state->bracketlevel = 1; // *** bracketlevel replaces fbracketstack in 0.8
        break;
      }
      case scan_stm:  { state->start = INITIAL; break; }
      case scan_qstr: { state->start = SCANQSTR; break; }
      default: {
        state->start = INITIAL; // *** added in 1.0
        throw std::runtime_error("Internal error: Start condition does not exist");
      }
    }

    states.push(state);
    BEGIN(state->start);
    MSG_INFO(9) << "Beginning state " << state->start << endline;
  } // enter_state()

  void exit_state() { // *** added in 0.8
    if (!states.empty()) {
      states.pop();
      if (!states.empty()) {
        delete(state);
        state = states.top();
      } else {
        delete(state);
      }
    } else {
      msg::error(0) << "Internal error: State stack is empty while parsing file." << endline;
    }
    BEGIN((states.empty() ? INITIAL : state->start));
    MSG_INFO(9) << "Exiting to state " << YY_START << endline;
    if (!states.empty())
      msg::info(10) << "Argument is " << state->argument << endline;
  } // exit_state()

  void clear_state() { // *** added in 0.8
    if (!states.empty())
      msg::warn(0) << "Warning: State stack has " << states.size() << " frames left." << endline;
    while (!states.empty()) states.pop(); // TODO: Is there a method for this in the class?
    BEGIN(INITIAL);
  } // clear_state()

  const scanmode get_state() { // *** added in 0.8
    MSG_INFO(10) << "get_state: " << YY_START << endline;
    switch (YY_START) {
      case VERBATIM: { return verbatim; break; }
      case INITIAL:  { return scan_stm; break; }
      case PROC_KW:  { return keyword; break; }
      case VERB:     { return verb; break; }
      case SCANSTR:  { return scan_str; break; }
      case SCANEX:   { return scan_ex; break; }
      case SCANQSTR: { return scan_qstr; break; }
      default: {
        msg::error(0) << "Internal error: Unknown scanmode while parsing file." << endline;
        return scan_stm;
      }
    }
  } // get_state()


  // ----------------- Helper functions ----------------
  // *** removed checksign in 0.8
  bool check_implicit_mul(const int token) {
    if (state->implicit_mul_might_follow) {
      if (((state->last_token == DIGIT) && (token == DIGIT)) ||
          ((state->last_token == FUNC) && (token == BOPEN)) ||
          ((state->last_token == FUNC) && (token == NAME))) // *** added FUNC cases in 0.5
        return false; // No implicit mul may exist between these two tokens
      state->implicit_mul_might_follow = false;
      state->last_token = ' ';
      yyless(0);
      yylval.str = new std::string(" "); // *** added in 1.0
      return (true);
    } else {
      return false;
    }
  } // check_implicit_mul()

  void set_implicit_mul(const bool how) { // *** added in 0.8
    state->implicit_mul_might_follow = how;
  }

  void set_asterisk_statement(const bool how) { // *** added in 1.4.3
    state->asterisk_statement = how;
    MSG_INFO(10) << "Set asterisk statement to " << (how ? "true" : "false") << endline;
  }

  const bool get_asterisk_statement() { // *** added in 1.4.3
    return state->asterisk_statement; 
  }

  void remove_space(std::string &what) { // Erase space inside names *** added in 1.0
    unsigned int pos;
    while ((pos = what.find(" ")) <= what.size()) what.erase(pos,1);
  }