自个儿动手写C语言编译器（3）

自己动手写C语言编译器（3）
词法分析器部分完成。
支持：
1.支持单词分割
2.支持数字类型
3.支持字符串
4.支持换行
6.支持注释
不支持：
1.不支持关键字
2.不支持变量。
3.不支持关键字。
4.不支操作符。
偶没有被那些个编译原理课程所吓倒。。。。。真的勇士，只管前行！
#ifndef _ISTREAMTOKENIZER_H_#define _ISTREAMTOKENIZER_H_#include &lt;limits.h&gt;#include &lt;string&gt;#include &lt;istream&gt;#include &lt;vector&gt;#define _COUNT_OF(a) (sizeof(a)/sizeof(a[0]))class IstreamTokenizer {    private:/*** The next character to be considered by the nextToken method.  May also* be NEED_CHAR to indicate that a new character should be read, or SKIP_LF* to indicate that a new character should be read and, if it is a '\n'* character, it should be discarded and a second new character should be* read.*/static const int SKIP_LF;static const int NEED_CHAR;//字符类型static const unsigned char CT_WHITESPACE;static const unsigned char CT_DIGIT;static const unsigned char CT_ALPHA;static const unsigned char CT_QUOTE;static const unsigned char CT_COMMENT;public://token类型static const int TT_EOF;static const int TT_EOL;static const int TT_NUMBER;static const int TT_WORD;static const int TT_NOTHING;private:std::istream& input;std::vector&lt;char&gt; buf;int peekc;        bool pushedBack;bool forceLower;int LINENO;bool eolIsSignificantP;bool slashSlashCommentsP;bool slashStarCommentsP;unsigned char ctype[256];public:std::string sval;double nval;int ttype;private:void init() {wordChars('a', 'z');wordChars('A', 'Z');wordChars(128 + 32, 255);whitespaceChars(0, ' ');commentChar('/');quoteChar('"');quoteChar('\'');parseNumbers();}    public:IstreamTokenizer(std::istream& is): input(is), peekc(NEED_CHAR){init();}void resetSyntax() {for (int i = _COUNT_OF(ctype); --i &gt;= 0;)ctype[i] = 0;}void wordChars(int low, int hi) {if (low &lt; 0)low = 0;if (hi &gt;= _COUNT_OF(ctype))hi = _COUNT_OF(ctype) - 1;while (low &lt;= hi)ctype[low++] |= CT_ALPHA;}void whitespaceChars(int low, int hi) {if (low &lt; 0)low = 0;if (hi &gt;= _COUNT_OF(ctype))hi = _COUNT_OF(ctype) - 1;while (low &lt;= hi)ctype[low++] = CT_WHITESPACE;}void ordinaryChars(int low, int hi) {if (low &lt; 0)low = 0;if (hi &gt;= _COUNT_OF(ctype))hi = _COUNT_OF(ctype) - 1;while (low &lt;= hi)ctype[low++] = 0;}void ordinaryChar(int ch) {if (ch &gt;= 0 && ch &lt; _COUNT_OF(ctype))ctype[ch] = 0;}void commentChar(int ch) {if (ch &gt;= 0 && ch &lt; _COUNT_OF(ctype))ctype[ch] = CT_COMMENT;}void quoteChar(int ch) {if (ch &gt;= 0 && ch &lt; _COUNT_OF(ctype))ctype[ch] = CT_QUOTE;}void parseNumbers() {for (int i = '0'; i &lt;= '9'; i++)ctype[i] |= CT_DIGIT;ctype['.'] |= CT_DIGIT;ctype['-'] |= CT_DIGIT;}/*** Determines whether or not ends of line are treated as tokens.* If the flag argument is true, this tokenizer treats end of lines* as tokens; the &lt;code&gt;nextToken&lt;/code&gt; method returns* &lt;code&gt;TT_EOL&lt;/code&gt; and also sets the &lt;code&gt;ttype&lt;/code&gt; field to* this value when an end of line is read.* &lt;p&gt;* A line is a sequence of characters ending with either a* carriage-return character (&lt;code&gt;'&#92;r'&lt;/code&gt;) or a newline* character (&lt;code&gt;'&#92;n'&lt;/code&gt;). In addition, a carriage-return* character followed immediately by a newline character is treated* as a single end-of-line token.* &lt;p&gt;* If the &lt;code&gt;flag&lt;/code&gt; is false, end-of-line characters are* treated as white space and serve only to separate tokens.** @param   flag   &lt;code&gt;true&lt;/code&gt; indicates that end-of-line characters*                 are separate tokens; &lt;code&gt;false&lt;/code&gt; indicates that*                 end-of-line characters are white space.* @see     java.io.StreamTokenizer#nextToken()* @see     java.io.StreamTokenizer#ttype* @see     java.io.StreamTokenizer#TT_EOL*/void eolIsSignificant(bool flag) {eolIsSignificantP = flag;}void slashStarComments(bool flag) {slashStarCommentsP = flag;}void slashSlashComments(bool flag) {slashSlashCommentsP = flag;}void lowerCaseMode(bool fl) {forceLower = fl;}/** Read the next character */private:int read()  {return input.get();}int nextToken() {if (pushedBack) {pushedBack = false;return ttype;}unsigned char* ct = ctype;int c = peekc;if (c &lt; 0)c = NEED_CHAR;if (c == SKIP_LF) {c = read();if (c &lt; 0)return ttype = TT_EOF;if (c == '\n')c = NEED_CHAR;}if (c == NEED_CHAR) {c = read();if (c &lt; 0)return ttype = TT_EOF;}ttype = c;/* Just to be safe *//* Set peekc so that the next invocation of nextToken will read* another character unless peekc is reset in this invocation*/peekc = NEED_CHAR;int ctype = c &lt; 256 ? ct[c] : CT_ALPHA;while ((ctype & CT_WHITESPACE) != 0) {if (c == '\r') {LINENO++;if (eolIsSignificantP) {//end of line 作为结束的标识。peekc = SKIP_LF;return ttype = TT_EOL;}c = read();if (c == '\n')c = read();} else {if (c == '\n') {LINENO++;if (eolIsSignificantP) {//end of line 作为结束的标识。return ttype = TT_EOL;}}c = read();}if (c &lt; 0)return ttype = TT_EOF;ctype = c &lt; 256 ? ct[c] : CT_ALPHA;}if ((ctype & CT_DIGIT) != 0) {bool neg = false;if (c == '-') {c = read();if (c != '.' && (c &lt; '0' || c &gt; '9')) {peekc = c;return ttype = '-';}neg = true;}double v = 0;int decexp = 0;int seendot = 0;while (true) {if (c == '.' && seendot == 0)seendot = 1;else if ('0' &lt;= c && c &lt;= '9') {v = v * 10 + (c - '0');decexp += seendot;} elsebreak;c = read();}peekc = c;if (decexp != 0) {double denom = 10;decexp--;while (decexp &gt; 0) {denom *= 10;decexp--;}/* Do one division of a likely-to-be-more-accurate number */v = v / denom;}nval = neg ? -v : v;return ttype = TT_NUMBER;}if ((ctype & CT_ALPHA) != 0) {int i = 0;do {if (i &gt;= buf.size()) {buf.resize(buf.size()*2);}buf[i++] = (char) c;c = read();ctype = c &lt; 0 ? CT_WHITESPACE : c &lt; 256 ? ct[c] : CT_ALPHA;} while ((ctype & (CT_ALPHA | CT_DIGIT)) != 0);peekc = c;sval.resize(i, 0);std::copy(buf.begin(), buf.end(), sval.begin());return ttype = TT_WORD;}if ((ctype & CT_QUOTE) != 0) {ttype = c;int i = 0;int d = read();while (d &gt;= 0 && d != ttype && d != '\n' && d != '\r') {if (d == '\\') {c = read();int first = c;   /* To allow \377, but not \477 */if (c &gt;= '0' && c &lt;= '7') {c = c - '0';int c2 = read();if ('0' &lt;= c2 && c2 &lt;= '7') {c = (c &lt;&lt; 3) + (c2 - '0');c2 = read();if ('0' &lt;= c2 && c2 &lt;= '7' && first &lt;= '3') {c = (c &lt;&lt; 3) + (c2 - '0');d = read();} elsed = c2;} elsed = c2;} else {switch (c) {case 'a':c = 0x7;break;case 'b':c = '\b';break;case 'f':c = 0xC;break;case 'n':c = '\n';break;case 'r':c = '\r';break;case 't':c = '\t';break;case 'v':c = 0xB;break;}d = read();}} else {c = d;d = read();}if (i &gt;= buf.size()) {buf.resize(buf.size()*2);}buf[i++] = (char)c;}/* If we broke out of the loop because we found a matching quote* character then arrange to read a new character next time* around; otherwise, save the character.*/peekc = (d == ttype) ? NEED_CHAR : d;buf.resize(i);std::copy(buf.begin(), buf.end(), sval.begin());return ttype;}if (c == '/' && (slashSlashCommentsP || slashStarCommentsP)) {c = read();if (c == '*' && slashStarCommentsP) {int prevc = 0;while ((c = read()) != '/' || prevc != '*') {if (c == '\r') {LINENO++;c = read();if (c == '\n') {c = read();}} else {if (c == '\n') {LINENO++;c = read();}}if (c &lt; 0)return ttype = TT_EOF;prevc = c;}return nextToken();} else if (c == '/' && slashSlashCommentsP) {while ((c = read()) != '\n' && c != '\r' && c &gt;= 0);peekc = c;return nextToken();} else {/* Now see if it is still a single line comment */if ((ct['/'] & CT_COMMENT) != 0) {while ((c = read()) != '\n' && c != '\r' && c &gt;= 0);peekc = c;return nextToken();} else {peekc = c;return ttype = '/';}}}if ((ctype & CT_COMMENT) != 0) {while ((c = read()) != '\n' && c != '\r' && c &gt;= 0);peekc = c;return nextToken();}return ttype = c;    }         void pushBack() {        if (ttype != TT_NOTHING)pushedBack = true;    }     int lineno() {return LINENO;    }     std::string toString();};const unsigned char IstreamTokenizer::CT_WHITESPACE = 1;const unsigned char IstreamTokenizer::CT_DIGIT = 2;const unsigned char IstreamTokenizer::CT_ALPHA = 4;const unsigned char IstreamTokenizer::CT_QUOTE = 8;const unsigned char IstreamTokenizer::CT_COMMENT = 16;const int IstreamTokenizer::NEED_CHAR = INT_MAX; const int IstreamTokenizer::SKIP_LF = INT_MAX - 1;#endif
? 1 楼 vb2005xu 2011-07-29 希望再接再厉我以前也想弄这个玩意 2 楼 jcs130 2011-07-29 额……真厉害……我以后也试试…… 3 楼 rundout 2011-07-30 牛人，希望你勇往直前
自个儿动手写C语言编译器（3）

热点推荐