编译原理LL1语法分析算法的兑现（token转四元式）

编译原理LL1语法分析算法的实现（token转四元式）

#include <stdio.h>

#include <stack>

#include <vector>

#include <iostream>

#include <fstream>

#include <malloc.h>

using namespace std;

//token结构

struct token {

?? ?int code; //token的类别,code为1则是符号，为2则是数字

?? ?char value; //token的值

};

typedef struct token tokens;

vector<tokens> tokenBuffer; //用于存储token的缓冲区

stack<tokens> tokenBuffers;

//产生式结构

struct formula {

?? ?int id; //产生式编号

?? ?char left; //产生式左部

?? ?char right[256]; //产生式右部

?? ?int r_length; //产生式右部长度

};

typedef struct formula formulas;

formulas formulaBuffer[11]; //用于存储产生式的缓冲区

//四元式结构

struct expression {

?? ?char symbol; //操作符

?? ?char num1; //第一个操作数

?? ?char num2; //第二个操作数

?? ?char result; //结果变量

};

typedef struct expression expressions;

vector<expressions> expressionBuffer; //用于存储四元式的缓冲区

int expressionCount = 0; //四元式个数

//分析表中每一项的结构

struct analysisTable {

?? ?char currentState; //分析栈的栈顶符号

?? ?char currentToken; //当前字符

?? ?int expressionNum; //对应的产生式编号

};

typedef struct analysisTable analysisTables;

vector<analysisTables> analysisTableBuffer; //LL1分析表的缓冲区

stack<char> analysisStack; //分析栈

stack<char> sematicStack; //语义栈

//初始化LL1分析表

void initialAnalysisTableBuffer() {

?? ?analysisTables* temp1a = new analysisTable;

?? ?analysisTables* temp1b = new analysisTable;

?? ?analysisTables* temp1c = new analysisTable;

?? ?analysisTables* temp2 = new analysisTable;

?? ?analysisTables* temp3 = new analysisTable;

?? ?analysisTables* temp4 = new analysisTable;

?? ?analysisTables* temp5 = new analysisTable;

?? ?analysisTables* temp6 = new analysisTable;

?? ?analysisTables* temp7a = new analysisTable;

?? ?analysisTables* temp7b = new analysisTable;

?? ?analysisTables* temp7c = new analysisTable;

?? ?analysisTables* temp8 = new analysisTable;

?? ?analysisTables* temp9 = new analysisTable;

?? ?analysisTables* temp10 = new analysisTable;

?? ?analysisTables* temp11 = new analysisTable;

?? ?analysisTables* temp12 = new analysisTable;

?? ?analysisTables* temp13 = new analysisTable;

?? ?analysisTables* temp14 = new analysisTable;

?? ?analysisTables* temp15a = new analysisTable;

?? ?analysisTables* temp15b = new analysisTable;

?? ?analysisTables* temp15c = new analysisTable;

?? ?analysisTables* temp16 = new analysisTable;

?? ?temp1a->expressionNum = 1;

?? ?temp1a->currentState = 'E';

?? ?temp1a->currentToken = 'a';

?? ?temp1b->expressionNum = 1;

?? ?temp1b->currentState = 'E';

?? ?temp1b->currentToken = 'b';

?? ?temp1c->expressionNum = 1;

?? ?temp1c->currentState = 'E';

?? ?temp1c->currentToken = 'c';

?? ?temp2->expressionNum = 1;

?? ?temp2->currentState = 'E';

?? ?temp2->currentToken = '(';

?? ?temp3->expressionNum = 2;

?? ?temp3->currentState = 'L';

?? ?temp3->currentToken = '+';

?? ?temp4->expressionNum = 3;

?? ?temp4->currentState = 'L';

?? ?temp4->currentToken = '-';

?? ?temp5->expressionNum = 4;

?? ?temp5->currentState = 'L';

?? ?temp5->currentToken = ')';

?? ?temp6->expressionNum = 4;

?? ?temp6->currentState = 'L';

?? ?temp6->currentToken = '#';

?? ?temp7a->expressionNum = 5;

?? ?temp7a->currentState = 'T';

?? ?temp7a->currentToken = 'a';

?? ?temp7b->expressionNum = 5;

?? ?temp7b->currentState = 'T';

?? ?temp7b->currentToken = 'b';

?? ?temp7c->expressionNum = 5;

?? ?temp7c->currentState = 'T';

?? ?temp7c->currentToken = 'c';

?? ?temp8->expressionNum = 5;

?? ?temp8->currentState = 'T';

?? ?temp8->currentToken = '(';

?? ?temp9->expressionNum = 8;

?? ?temp9->currentState = 'M';

?? ?temp9->currentToken = '+';

?? ?temp10->expressionNum = 8;

?? ?temp10->currentState = 'M';

?? ?temp10->currentToken = '-';

?? ?temp11->expressionNum = 6;

?? ?temp11->currentState = 'M';

?? ?temp11->currentToken = '*';

?? ?temp12->expressionNum = 7;

?? ?temp12->currentState = 'M';

?? ?temp12->currentToken = '/';

?? ?temp13->expressionNum = 8;

?? ?temp13->currentState = 'M';

?? ?temp13->currentToken = ')';

?? ?temp14->expressionNum = 8;

?? ?temp14->currentState = 'M';

?? ?temp14->currentToken = '#';

?? ?temp15a->expressionNum = 9;

?? ?temp15a->currentState = 'F';

?? ?temp15a->currentToken = 'a';

?? ?temp15b->expressionNum = 9;

?? ?temp15b->currentState = 'F';

?? ?temp15b->currentToken = 'b';

?? ?temp15c->expressionNum = 9;

?? ?temp15c->currentState = 'F';

?? ?temp15c->currentToken = 'c';

?? ?temp16->expressionNum = 10;

?? ?temp16->currentState = 'F';

?? ?temp16->currentToken = '(';

?? ?analysisTableBuffer.push_back(*temp1a);

?? ?analysisTableBuffer.push_back(*temp1b);

?? ?analysisTableBuffer.push_back(*temp1c);

?? ?analysisTableBuffer.push_back(*temp2);

?? ?analysisTableBuffer.push_back(*temp3);

?? ?analysisTableBuffer.push_back(*temp4);

?? ?analysisTableBuffer.push_back(*temp5);

?? ?analysisTableBuffer.push_back(*temp6);

?? ?analysisTableBuffer.push_back(*temp7a);

?? ?analysisTableBuffer.push_back(*temp7b);

?? ?analysisTableBuffer.push_back(*temp7c);

?? ?analysisTableBuffer.push_back(*temp8);

?? ?analysisTableBuffer.push_back(*temp9);

?? ?analysisTableBuffer.push_back(*temp10);

?? ?analysisTableBuffer.push_back(*temp11);

?? ?analysisTableBuffer.push_back(*temp12);

?? ?analysisTableBuffer.push_back(*temp13);

?? ?analysisTableBuffer.push_back(*temp14);

?? ?analysisTableBuffer.push_back(*temp15a);

?? ?analysisTableBuffer.push_back(*temp15b);

?? ?analysisTableBuffer.push_back(*temp15c);

?? ?analysisTableBuffer.push_back(*temp16);

}

//由于本次实验主要是考察语法分析和语义分析，所以为了节省时间不采用查表的方式获取token，而是直接初始化token的值。

//初始化token缓冲区

void initialTokenBuffer() {

?? ?ifstream fin("tokens.txt");

?? ?char temp[10][5];

?? ?int i = 0;

?? ?while (!fin.eof()) {

?? ? ? ?fin.getline(temp[i], 4);

?? ? ? ?i++;

?? ?}

?? ?fin.close();

?? ?do {

?? ? ? ?i--;

?? ? ? ?tokens *token_temp = new tokens();

?? ? ? ?token_temp->code = atoi(&temp[i][0]);

?? ? ? ?token_temp->value = temp[i][2];

?? ? ? ?tokenBuffer.push_back(*token_temp);

?? ? ? ?tokenBuffers.push(*token_temp);

?? ?} while (i != 0);

?? ?// ? ?tokens* t1 = new tokens();

?? ?// ? ?tokens* t2 = new tokens();

?? ?// ? ?tokens* t3 = new tokens();

?? ?// ? ?tokens* t4 = new tokens();

?? ?// ? ?tokens* t5 = new tokens();

?? ?// ? ?tokens* t6 = new tokens();

?? ?// ? ?t1->code = 2;

?? ?// ? ?t1->value = 'a';

?? ?// ? ?t2->code = 1;

?? ?// ? ?t2->value = '+';

?? ?// ? ?t3->code = 2;

?? ?// ? ?t3->value = 'b';

?? ?// ? ?t4->code = 1;

?? ?// ? ?t4->value = '*';

?? ?// ? ?t5->code = 2;

?? ?// ? ?t5->value = 'c';

?? ?// ? ?t6->code = 1;

?? ?// ? ?t6->value = '#';

?? ?// ? ?tokenBuffer.push_back(*t1);

?? ?// ? ?tokenBuffer.push_back(*t2);

?? ?// ? ?tokenBuffer.push_back(*t3);

?? ?// ? ?tokenBuffer.push_back(*t4);

?? ?// ? ?tokenBuffer.push_back(*t5);

?? ?// ? ?tokenBuffer.push_back(*t6);

?? ?//

?? ?// ? ?tokenBuffers.push(*t6);

?? ?// ? ?tokenBuffers.push(*t5);

?? ?// ? ?tokenBuffers.push(*t4);

?? ?// ? ?tokenBuffers.push(*t3);

?? ?// ? ?tokenBuffers.push(*t2);

?? ?// ? ?tokenBuffers.push(*t1);

}

//初始化产生式缓冲区

void initialFormulaBuffer() {

?? ?formulas *fTemp1 = new formula;

?? ?formulas *fTemp2 = new formula;

?? ?formulas *fTemp3 = new formula;

?? ?formulas *fTemp4 = new formula;

?? ?formulas *fTemp5 = new formula;

?? ?formulas *fTemp6 = new formula;

?? ?formulas *fTemp7 = new formula;

?? ?formulas *fTemp8 = new formula;

?? ?formulas *fTemp9 = new formula;

?? ?formulas *fTemp10 = new formula;

?? ?fTemp1->id = 1;

?? ?fTemp1->left = 'E';

?? ?fTemp1->r_length = 2;

?? ?fTemp1->right[0] = 'T';

?? ?fTemp1->right[1] = 'L';

?? ?fTemp1->right[2] = '\0';

?? ?fTemp2->id = 2;

?? ?fTemp2->left = 'L';

?? ?fTemp2->r_length = 4;

?? ?fTemp2->right[0] = '+';

?? ?fTemp2->right[1] = 'T';

?? ?fTemp2->right[2] = 'Y';

?? ?fTemp2->right[3] = 'L';

?? ?fTemp2->right[4] = '\0';

?? ?fTemp3->id = 3;

?? ?fTemp3->left = 'L';

?? ?fTemp3->r_length = 4;

?? ?fTemp3->right[0] = '-';

?? ?fTemp3->right[1] = 'T';

?? ?fTemp3->right[2] = 'U';

?? ?fTemp3->right[3] = 'L';

?? ?fTemp3->right[4] = '\0';

?? ?fTemp4->id = 4;

?? ?fTemp4->left = 'L';

?? ?fTemp4->r_length = 0;

?? ?//fTemp->right[0] = "N";

?? ?fTemp5->id = 5;

?? ?fTemp5->left = 'T';

?? ?fTemp5->r_length = 2;

?? ?fTemp5->right[0] = 'F';

?? ?fTemp5->right[1] = 'M';

?? ?fTemp6->id = 6;

?? ?fTemp6->left = 'M';

?? ?fTemp6->r_length = 4;

?? ?fTemp6->right[0] = '*';

?? ?fTemp6->right[1] = 'F';

?? ?fTemp6->right[2] = 'I';

?? ?fTemp6->right[3] = 'M';

?? ?fTemp7->id = 7;

?? ?fTemp7->left = 'M';

?? ?fTemp7->r_length = 4;

?? ?fTemp7->right[0] = '/';

?? ?fTemp7->right[1] = 'F';

?? ?fTemp7->right[2] = 'O';

?? ?fTemp7->right[3] = 'M';

?? ?fTemp8->id = 8;

?? ?fTemp8->left = 'M';

?? ?fTemp8->r_length = 0;

?? ?fTemp9->id = 9;

?? ?fTemp9->left = 'F';

?? ?fTemp9->r_length = 2;

?? ?fTemp9->right[0] = 'i';

?? ?fTemp9->right[1] = 'P';

?? ?fTemp10->id = 10;

?? ?fTemp10->left = 'F';

?? ?fTemp10->r_length = 3;

?? ?fTemp10->right[0] = '(';

?? ?fTemp10->right[1] = 'E';

?? ?fTemp10->right[2] = ')';

?? ?formulaBuffer[0] = *fTemp1;

?? ?formulaBuffer[1] = *fTemp2;

?? ?formulaBuffer[2] = *fTemp3;

?? ?formulaBuffer[3] = *fTemp4;

?? ?formulaBuffer[4] = *fTemp5;

?? ?formulaBuffer[5] = *fTemp6;

?? ?formulaBuffer[6] = *fTemp7;

?? ?formulaBuffer[7] = *fTemp8;

?? ?formulaBuffer[8] = *fTemp9;

?? ?formulaBuffer[9] = *fTemp10;

}

//入语义栈操作

void accept(tokens temp) {

?? ?if (temp.code == 1) {

?? ? ? ?cout << temp.value << "匹配成功" << endl;

?? ? ? ?return;

?? ?}

?? ?cout << temp.value << " ? ?匹配成功" << endl;

?? ?cout << temp.value << " ? ?进入语义栈" << endl;

?? ?sematicStack.push(temp.value);

}

//产生四元式并添加到四元式缓冲区中

void produceExpression(char temp) {

?? ?expressions *eTemp = new expressions;

?? ?eTemp->num2 = sematicStack.top();

?? ?sematicStack.pop();

?? ?eTemp->num1 = sematicStack.top();

?? ?sematicStack.pop();

?? ?eTemp->symbol = temp;

?? ?eTemp->result = (char) ((int) 'u' + expressionCount);

?? ?sematicStack.push(eTemp->result);

?? ?expressionBuffer.push_back(*eTemp);

?? ?expressionCount++;

}

//输出四元式

void printExpression() {

?? ?for (vector<expressions>::iterator i = expressionBuffer.begin(); i != expressionBuffer.end(); i++) {

?? ? ? ?cout << "四元式:" << i->symbol << " " << (char) (i->num1) << " " << (char) (i->num2) << " " << i->result << endl;

?? ?}

}

//输出读取到的token

void printTokens() {

?? ?for (vector<tokens>::iterator i = tokenBuffer.begin(); i != tokenBuffer.end(); i++) {

?? ? ? ?cout << "token：" << i->code << " " << i->value << endl;

?? ?}

}

//查找分析表将相应产生式压入分析栈

bool searchAnalysiTable(char cState, tokens cToken) {

?? ?formulas *fTemp = new formula;

?? ?//查分析表，获取对应的产生式编号

?? ?int e_num;

?? ?vector<analysisTables>::iterator i;

?? ?for (i = analysisTableBuffer.begin(); i != analysisTableBuffer.end(); i++) {

?? ? ? ?if (i->currentState == cState && i->currentToken == cToken.value) {

?? ? ? ? ? ?e_num = i->expressionNum;

?? ? ? ? ? ?break;

?? ? ? ?}

?? ?}

?? ?if (i == analysisTableBuffer.end()) {

?? ? ? ?cout << "parse error" << endl;

?? ? ? ?return false;

?? ?}

?? ?cout << "产生式编号：" << e_num << endl;

?? ?//查找产生式缓冲区获得对应产生式

?? ?fTemp = &formulaBuffer[e_num - 1];

?? ?cout << analysisStack.top() << "出栈" << endl;

?? ?analysisStack.pop();

?? ?//将产生式右部逆序压栈

?? ?if (e_num == 9)

?? ? ? ?fTemp->right[0] = cToken.value;

?? ?int j = fTemp->r_length;

?? ?for (int i = 0; i < fTemp->r_length; i++) {

?? ? ? ?cout << fTemp->right[j - 1] << "进入分析栈" << endl;

?? ? ? ?analysisStack.push(fTemp->right[j - 1]);

?? ? ? ?j--;

?? ?}

?? ?return true;

}

int main(int argc, char *argv[]) {

?? ?initialAnalysisTableBuffer();

?? ?initialTokenBuffer();

?? ?initialFormulaBuffer();

?? ?printTokens();

?? ?analysisStack.push('#');

?? ?analysisStack.push('E');

?? ?bool b_temp;

?? ?while (!analysisStack.empty()) {

?? ? ? ?tokens currentToken = tokenBuffers.top();

?? ? ? ?if (currentToken.value == analysisStack.top()) {

?? ? ? ? ? ?accept(currentToken);

?? ? ? ? ? ?tokenBuffers.pop();

?? ? ? ? ? ?cout << analysisStack.top() << "出栈" << endl;

?? ? ? ? ? ?analysisStack.pop();

?? ? ? ? ? ?continue;

?? ? ? ?}

?? ? ? ?cout << "current state:" << analysisStack.top() << endl;

?? ? ? ?cout << "current token:" << currentToken.value << endl;

?? ? ? ?switch (analysisStack.top()) {

?? ? ? ? ? ?case 'Y':

?? ? ? ? ? ? ? ?produceExpression('+');