Lua 5.1.4: llex.c


L0001    /*
L0002    ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $
L0003    ** Lexical Analyzer
L0004    ** See Copyright Notice in lua.h
L0005    */
L0006    
L0007    
L0008    #include <ctype.h>
L0009    #include <locale.h>
L0010    #include <string.h>
L0011    
L0012    #define llex_c
L0013    #define LUA_CORE
L0014    
L0015    #include "lua.h"
L0016    
L0017    #include "ldo.h"
L0018    #include "llex.h"
L0019    #include "lobject.h"
L0020    #include "lparser.h"
L0021    #include "lstate.h"
L0022    #include "lstring.h"
L0023    #include "ltable.h"
L0024    #include "lzio.h"
L0025    
L0026    
L0027    
L0028    #define next(ls) (ls->current = zgetc(ls->z))
L0029    
L0030    
L0031    
L0032    
L0033    #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
L0034    
L0035    
L0036    /* ORDER RESERVED */
L0037    const char *const luaX_tokens [] = {
L0038        "and", "break", "do", "else", "elseif",
L0039        "end", "false", "for", "function", "if",
L0040        "in", "local", "nil", "not", "or", "repeat",
L0041        "return", "then", "true", "until", "while",
L0042        "..", "...", "==", ">=", "<=", "~=",
L0043        "<number>", "<name>", "<string>", "<eof>",
L0044        NULL
L0045    };
L0046    
L0047    
L0048    #define save_and_next(ls) (save(ls, ls->current), next(ls))
L0049    
L0050    
L0051    static void save (LexState *ls, int c) {
L0052      Mbuffer *b = ls->buff;
L0053      if (b->n + 1 > b->buffsize) {
L0054        size_t newsize;
L0055        if (b->buffsize >= MAX_SIZET/2)
L0056          luaX_lexerror(ls, "lexical element too long", 0);
L0057        newsize = b->buffsize * 2;
L0058        luaZ_resizebuffer(ls->L, b, newsize);
L0059      }
L0060      b->buffer[b->n++] = cast(char, c);
L0061    }
L0062    
L0063    
L0064    void luaX_init (lua_State *L) {
L0065      int i;
L0066      for (i=0; i<NUM_RESERVED; i++) {
L0067        TString *ts = luaS_new(L, luaX_tokens[i]);
L0068        luaS_fix(ts);  /* reserved words are never collected */
L0069        lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
L0070        ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
L0071      }
L0072    }
L0073    
L0074    
L0075    #define MAXSRC          80
L0076    
L0077    
L0078    const char *luaX_token2str (LexState *ls, int token) {
L0079      if (token < FIRST_RESERVED) {
L0080        lua_assert(token == cast(unsigned char, token));
L0081        return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
L0082                                  luaO_pushfstring(ls->L, "%c", token);
L0083      }
L0084      else
L0085        return luaX_tokens[token-FIRST_RESERVED];
L0086    }
L0087    
L0088    
L0089    static const char *txtToken (LexState *ls, int token) {
L0090      switch (token) {
L0091        case TK_NAME:
L0092        case TK_STRING:
L0093        case TK_NUMBER:
L0094          save(ls, '\0');
L0095          return luaZ_buffer(ls->buff);
L0096        default:
L0097          return luaX_token2str(ls, token);
L0098      }
L0099    }
L0100    
L0101    
L0102    void luaX_lexerror (LexState *ls, const char *msg, int token) {
L0103      char buff[MAXSRC];
L0104      luaO_chunkid(buff, getstr(ls->source), MAXSRC);
L0105      msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
L0106      if (token)
L0107        luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
L0108      luaD_throw(ls->L, LUA_ERRSYNTAX);
L0109    }
L0110    
L0111    
L0112    void luaX_syntaxerror (LexState *ls, const char *msg) {
L0113      luaX_lexerror(ls, msg, ls->t.token);
L0114    }
L0115    
L0116    
L0117    TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
L0118      lua_State *L = ls->L;
L0119      TString *ts = luaS_newlstr(L, str, l);
L0120      TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
L0121      if (ttisnil(o))
L0122        setbvalue(o, 1);  /* make sure `str' will not be collected */
L0123      return ts;
L0124    }
L0125    
L0126    
L0127    static void inclinenumber (LexState *ls) {
L0128      int old = ls->current;
L0129      lua_assert(currIsNewline(ls));
L0130      next(ls);  /* skip `\n' or `\r' */
L0131      if (currIsNewline(ls) && ls->current != old)
L0132        next(ls);  /* skip `\n\r' or `\r\n' */
L0133      if (++ls->linenumber >= MAX_INT)
L0134        luaX_syntaxerror(ls, "chunk has too many lines");
L0135    }
L0136    
L0137    
L0138    void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
L0139      ls->decpoint = '.';
L0140      ls->L = L;
L0141      ls->lookahead.token = TK_EOS;  /* no look-ahead token */
L0142      ls->z = z;
L0143      ls->fs = NULL;
L0144      ls->linenumber = 1;
L0145      ls->lastline = 1;
L0146      ls->source = source;
L0147      luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
L0148      next(ls);  /* read first char */
L0149    }
L0150    
L0151    
L0152    
L0153    /*
L0154    ** =======================================================
L0155    ** LEXICAL ANALYZER
L0156    ** =======================================================
L0157    */
L0158    
L0159    
L0160    
L0161    static int check_next (LexState *ls, const char *set) {
L0162      if (!strchr(set, ls->current))
L0163        return 0;
L0164      save_and_next(ls);
L0165      return 1;
L0166    }
L0167    
L0168    
L0169    static void buffreplace (LexState *ls, char from, char to) {
L0170      size_t n = luaZ_bufflen(ls->buff);
L0171      char *p = luaZ_buffer(ls->buff);
L0172      while (n--)
L0173        if (p[n] == from) p[n] = to;
L0174    }
L0175    
L0176    
L0177    static void trydecpoint (LexState *ls, SemInfo *seminfo) {
L0178      /* format error: try to update decimal point separator */
L0179      struct lconv *cv = localeconv();
L0180      char old = ls->decpoint;
L0181      ls->decpoint = (cv ? cv->decimal_point[0] : '.');
L0182      buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
L0183      if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
L0184        /* format error with correct decimal point: no more options */
L0185        buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
L0186        luaX_lexerror(ls, "malformed number", TK_NUMBER);
L0187      }
L0188    }
L0189    
L0190    
L0191    /* LUA_NUMBER */
L0192    static void read_numeral (LexState *ls, SemInfo *seminfo) {
L0193      lua_assert(isdigit(ls->current));
L0194      do {
L0195        save_and_next(ls);
L0196      } while (isdigit(ls->current) || ls->current == '.');
L0197      if (check_next(ls, "Ee"))  /* `E'? */
L0198        check_next(ls, "+-");  /* optional exponent sign */
L0199      while (isalnum(ls->current) || ls->current == '_')
L0200        save_and_next(ls);
L0201      save(ls, '\0');
L0202      buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
L0203      if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
L0204        trydecpoint(ls, seminfo); /* try to update decimal point separator */
L0205    }
L0206    
L0207    
L0208    static int skip_sep (LexState *ls) {
L0209      int count = 0;
L0210      int s = ls->current;
L0211      lua_assert(s == '[' || s == ']');
L0212      save_and_next(ls);
L0213      while (ls->current == '=') {
L0214        save_and_next(ls);
L0215        count++;
L0216      }
L0217      return (ls->current == s) ? count : (-count) - 1;
L0218    }
L0219    
L0220    
L0221    static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
L0222      int cont = 0;
L0223      (void)(cont);  /* avoid warnings when `cont' is not used */
L0224      save_and_next(ls);  /* skip 2nd `[' */
L0225      if (currIsNewline(ls))  /* string starts with a newline? */
L0226        inclinenumber(ls);  /* skip it */
L0227      for (;;) {
L0228        switch (ls->current) {
L0229          case EOZ:
L0230            luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
L0231                                       "unfinished long comment", TK_EOS);
L0232            break;  /* to avoid warnings */
L0233    #if defined(LUA_COMPAT_LSTR)
L0234          case '[': {
L0235            if (skip_sep(ls) == sep) {
L0236              save_and_next(ls);  /* skip 2nd `[' */
L0237              cont++;
L0238    #if LUA_COMPAT_LSTR == 1
L0239              if (sep == 0)
L0240                luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
L0241    #endif
L0242            }
L0243            break;
L0244          }
L0245    #endif
L0246          case ']': {
L0247            if (skip_sep(ls) == sep) {
L0248              save_and_next(ls);  /* skip 2nd `]' */
L0249    #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
L0250              cont--;
L0251              if (sep == 0 && cont >= 0) break;
L0252    #endif
L0253              goto endloop;
L0254            }
L0255            break;
L0256          }
L0257          case '\n':
L0258          case '\r': {
L0259            save(ls, '\n');
L0260            inclinenumber(ls);
L0261            if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
L0262            break;
L0263          }
L0264          default: {
L0265            if (seminfo) save_and_next(ls);
L0266            else next(ls);
L0267          }
L0268        }
L0269      } endloop:
L0270      if (seminfo)
L0271        seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
L0272                                         luaZ_bufflen(ls->buff) - 2*(2 + sep));
L0273    }
L0274    
L0275    
L0276    static void read_string (LexState *ls, int del, SemInfo *seminfo) {
L0277      save_and_next(ls);
L0278      while (ls->current != del) {
L0279        switch (ls->current) {
L0280          case EOZ:
L0281            luaX_lexerror(ls, "unfinished string", TK_EOS);
L0282            continue;  /* to avoid warnings */
L0283          case '\n':
L0284          case '\r':
L0285            luaX_lexerror(ls, "unfinished string", TK_STRING);
L0286            continue;  /* to avoid warnings */
L0287          case '\\': {
L0288            int c;
L0289            next(ls);  /* do not save the `\' */
L0290            switch (ls->current) {
L0291              case 'a': c = '\a'; break;
L0292              case 'b': c = '\b'; break;
L0293              case 'f': c = '\f'; break;
L0294              case 'n': c = '\n'; break;
L0295              case 'r': c = '\r'; break;
L0296              case 't': c = '\t'; break;
L0297              case 'v': c = '\v'; break;
L0298              case '\n':  /* go through */
L0299              case '\r': save(ls, '\n'); inclinenumber(ls); continue;
L0300              case EOZ: continue;  /* will raise an error next loop */
L0301              default: {
L0302                if (!isdigit(ls->current))
L0303                  save_and_next(ls);  /* handles \\, \", \', and \? */
L0304                else {  /* \xxx */
L0305                  int i = 0;
L0306                  c = 0;
L0307                  do {
L0308                    c = 10*c + (ls->current-'0');
L0309                    next(ls);
L0310                  } while (++i<3 && isdigit(ls->current));
L0311                  if (c > UCHAR_MAX)
L0312                    luaX_lexerror(ls, "escape sequence too large", TK_STRING);
L0313                  save(ls, c);
L0314                }
L0315                continue;
L0316              }
L0317            }
L0318            save(ls, c);
L0319            next(ls);
L0320            continue;
L0321          }
L0322          default:
L0323            save_and_next(ls);
L0324        }
L0325      }
L0326      save_and_next(ls);  /* skip delimiter */
L0327      seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
L0328                                       luaZ_bufflen(ls->buff) - 2);
L0329    }
L0330    
L0331    
L0332    static int llex (LexState *ls, SemInfo *seminfo) {
L0333      luaZ_resetbuffer(ls->buff);
L0334      for (;;) {
L0335        switch (ls->current) {
L0336          case '\n':
L0337          case '\r': {
L0338            inclinenumber(ls);
L0339            continue;
L0340          }
L0341          case '-': {
L0342            next(ls);
L0343            if (ls->current != '-') return '-';
L0344            /* else is a comment */
L0345            next(ls);
L0346            if (ls->current == '[') {
L0347              int sep = skip_sep(ls);
L0348              luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
L0349              if (sep >= 0) {
L0350                read_long_string(ls, NULL, sep);  /* long comment */
L0351                luaZ_resetbuffer(ls->buff);
L0352                continue;
L0353              }
L0354            }
L0355            /* else short comment */
L0356            while (!currIsNewline(ls) && ls->current != EOZ)
L0357              next(ls);
L0358            continue;
L0359          }
L0360          case '[': {
L0361            int sep = skip_sep(ls);
L0362            if (sep >= 0) {
L0363              read_long_string(ls, seminfo, sep);
L0364              return TK_STRING;
L0365            }
L0366            else if (sep == -1) return '[';
L0367            else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
L0368          }
L0369          case '=': {
L0370            next(ls);
L0371            if (ls->current != '=') return '=';
L0372            else { next(ls); return TK_EQ; }
L0373          }
L0374          case '<': {
L0375            next(ls);
L0376            if (ls->current != '=') return '<';
L0377            else { next(ls); return TK_LE; }
L0378          }
L0379          case '>': {
L0380            next(ls);
L0381            if (ls->current != '=') return '>';
L0382            else { next(ls); return TK_GE; }
L0383          }
L0384          case '~': {
L0385            next(ls);
L0386            if (ls->current != '=') return '~';
L0387            else { next(ls); return TK_NE; }
L0388          }
L0389          case '"':
L0390          case '\'': {
L0391            read_string(ls, ls->current, seminfo);
L0392            return TK_STRING;
L0393          }
L0394          case '.': {
L0395            save_and_next(ls);
L0396            if (check_next(ls, ".")) {
L0397              if (check_next(ls, "."))
L0398                return TK_DOTS;   /* ... */
L0399              else return TK_CONCAT;   /* .. */
L0400            }
L0401            else if (!isdigit(ls->current)) return '.';
L0402            else {
L0403              read_numeral(ls, seminfo);
L0404              return TK_NUMBER;
L0405            }
L0406          }
L0407          case EOZ: {
L0408            return TK_EOS;
L0409          }
L0410          default: {
L0411            if (isspace(ls->current)) {
L0412              lua_assert(!currIsNewline(ls));
L0413              next(ls);
L0414              continue;
L0415            }
L0416            else if (isdigit(ls->current)) {
L0417              read_numeral(ls, seminfo);
L0418              return TK_NUMBER;
L0419            }
L0420            else if (isalpha(ls->current) || ls->current == '_') {
L0421              /* identifier or reserved word */
L0422              TString *ts;
L0423              do {
L0424                save_and_next(ls);
L0425              } while (isalnum(ls->current) || ls->current == '_');
L0426              ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
L0427                                      luaZ_bufflen(ls->buff));
L0428              if (ts->tsv.reserved > 0)  /* reserved word? */
L0429                return ts->tsv.reserved - 1 + FIRST_RESERVED;
L0430              else {
L0431                seminfo->ts = ts;
L0432                return TK_NAME;
L0433              }
L0434            }
L0435            else {
L0436              int c = ls->current;
L0437              next(ls);
L0438              return c;  /* single-char tokens (+ - / ...) */
L0439            }
L0440          }
L0441        }
L0442      }
L0443    }
L0444    
L0445    
L0446    void luaX_next (LexState *ls) {
L0447      ls->lastline = ls->linenumber;
L0448      if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
L0449        ls->t = ls->lookahead;  /* use this one */
L0450        ls->lookahead.token = TK_EOS;  /* and discharge it */
L0451      }
L0452      else
L0453        ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
L0454    }
L0455    
L0456    
L0457    void luaX_lookahead (LexState *ls) {
L0458      lua_assert(ls->lookahead.token == TK_EOS);
L0459      ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
L0460    }
L0461    

Generated by pretty.lua