


C++代码
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>using namespace std;vector<string> read_asm_file(const string &filename);
void write_output_file(const string &filename, const vector<string> &output);
vector<string> Assemble(const vector<string>& lines);int npower_16(int n);//return 16^n
int Hex2Num(char ch);//turn a character to the correspond number in hex repreinstructiontation
int atoi_hex(string strInValue);//replace a hex string to it's value
int s_find(string str, char c, int time);//return the position of the time'th character 'c' in the string str
string Pretreat(string line);//delete the label and construct symbol_address table
string dec_to_2com(int n, int digit);//turn a decimical number n to it's "digit" digits 2's complements
string get_first_word(string str);//return the first word(divided by space) in the instructiontence//translate each instrution from a lc-3 code line string to it's machine code string(end with \n)
string translate_instruction(const string &instruction);
string trans_ADD(string instruction);
string trans_AND(string instruction);
string trans_NOT(string instruction);
string trans_BR(string instruction);
string trans_JMP(string instruction);
string trans_JSR(string instruction);
string trans_JSRR(string instruction);
string trans_LD(string instruction);
string trans_LDI(string instruction);
string trans_LDR(string instruction);
string trans_LEA(string instruction);
string trans_RET(string instruction);
string trans_RTI(string instruction);
string trans_ST(string instruction);
string trans_STI(string instruction);
string trans_STR(string instruction);
string trans_TRAP(string instruction);
string trans_FILL(string instruction);
string trans_BLKW(string instruction);
string trans_STRINGZ(string instruction);int start_address;//the address after .ORIG
int current_address;//record the address of the current instruction
map<string, int> SA;//symbol_address table
主函数
int main(int argc, char *argv[]){if (argc != 3){cerr << "Usage: " << argv[0] << " <input_file.asm> <output_file.txt>" << endl;return 1;}string input_filename = argv[1];string output_filename = argv[2];vector<string> input_lines = read_asm_file(input_filename);Pretreat(input_lines[0]);//find the start addressvector<string> output_lines;output_lines.push_back(dec_to_2com(start_address,16));vector<string> pretreated_lines;// use the iterator to skip the first lineauto iter = input_lines.begin();++iter; // skip the first linefor (; iter != input_lines.end(); ++iter) {string& line = *iter;string pretreated_line = Pretreat(line);if(pretreated_line!="SKIP")pretreated_lines.push_back(pretreated_line);elsebreak;}auto assembled_lines = Assemble(pretreated_lines);output_lines.insert(output_lines.end(), assembled_lines.begin(), assembled_lines.end());write_output_file(output_filename, output_lines);return 0;
}
文件的读取与写入
vector<string> read_asm_file(const string &filename){vector<string> lines;string line;ifstream file(filename);if (file.is_open()){while (getline(file, line))lines.push_back(line);file.close();}elsecerr << "Unable to open file: " << filename << endl;return lines;
}void write_output_file(const string &filename, const vector<string> &output){ofstream file(filename);for(const auto &line:output){string temp;for(int i=0;i<line.length();i++)if(line[i]=='\n'||line[i]=='0'||line[i]=='1')temp+=line[i];file<<temp<<endl;}file.close();
}
汇编
vector<string> Assemble(const vector<string>& lines) {vector<string> mach_codes;current_address = start_address - 1;for (const string& line : lines) {string mach_code = "";mach_code.resize(10000);mach_code[0] = '\0';string fw = get_first_word(line);if (fw == ".BLKW"){int d = s_find(line, '#', 1);int imm16 = atoi(line.substr(d + 1).c_str());current_address += imm16;}else if (fw == ".STRINGZ"){int d1 = s_find(line, '"', 1) + 1;int d2 = s_find(line, '"', 2) - 1;current_address += d2 - d1 + 2;}elsecurrent_address++;string mach_line;//this string is what we finally want to getmach_line.resize(18);mach_line=translate_instruction(line);//translationmach_code.append(mach_line);mach_codes.push_back(mach_code);}return mach_codes;
}
预处理
//Label Deleting and Symbol_Adrress table construction
string Pretreat(string line){string output_line;output_line.resize(300);int temp_address=-1,j = 0;//notice that .BLKW and .STRINGZ occupies may not only one location in memory.string fw = get_first_word(line);if (fw == ".ORIG"){start_address = atoi_hex(line.substr(s_find(line, 'x', 1)+1).c_str());current_address = start_address-1;}else if (fw == ".END")return "SKIP";else if (fw == ".BLKW" || get_first_word(line.substr(fw.length() + 1, line.length() - fw.length())) == ".BLKW"){int d = s_find(line, '#', 1);int imm16 = atoi(line.substr(d + 1).c_str());temp_address = current_address + 1;current_address += imm16;}else if (fw == ".STRINGZ"||get_first_word(line.substr(fw.length()+1,line.length()-fw.length()))==".STRINGZ"){int d1 = s_find(line, '"', 1) + 1;int d2 = s_find(line, '"', 2) - 1;temp_address = current_address + 1;current_address += d2 - d1+2;}else{current_address++;temp_address = current_address;}//If the first word is not the instruction set, it is listed in symbol_ Address table and deleted in the codeif (fw != ".ORIG" && fw != "ADD" && fw != "AND" && fw != "NOT" && fw != "LD" && fw != "LDR" && fw != "LDI" && fw != "ST" &&fw != "STR" && fw != "STI" && fw != "TRAP" && fw != "LEA" && fw != "RTI" && fw != "JMP" && fw != "JSR" &&fw != "RET" && fw != "JSRR" && fw != ".FILL" && fw != ".STRINGZ" && fw != ".BLKW" && fw != "BR" && fw != "BRN" &&fw != "BRZ" && fw != "BRP" && fw != "BRNZ" && fw != "BRNP" && fw != "BRZP" && fw != "BRNZP" && fw != "TRAP" &&fw != ".END" && fw[0] != '\0'){SA.insert(make_pair(fw, temp_address));line.erase(s_find(line, fw[0], 1), fw.length());line.erase(s_find(line, ' ', 1), 1);}return line;
}
汇编指令转为机器码
string translate_instruction(const string &instruction){string machine_code,fw;fw = get_first_word(instruction);if (fw == "ADD")return trans_ADD(instruction);else if (fw == "AND")return trans_AND(instruction);else if (fw == "NOT")return trans_NOT(instruction);else if (fw == "LD")return trans_LD(instruction);else if (fw == "LDR")return trans_LDR(instruction);else if (fw == "LDI")return trans_LDI(instruction);else if (fw == "LEA")return trans_LEA(instruction);else if (fw == "ST")return trans_ST(instruction);else if (fw == "STR")return trans_STR(instruction);else if (fw == "STI")return trans_STI(instruction);else if (fw == "JMP")return trans_JMP(instruction);else if (fw == "JSRR")return trans_JSRR(instruction);else if (fw == "JSR")return trans_JSR(instruction);else if (fw == "RET")return trans_RET(instruction);else if (fw == "RTI")return trans_RTI(instruction);else if (fw == "BR" || fw == "BRN" || fw == "BRZ" || fw == "BRP" ||fw== "BRNZ" || fw == "BRNP" || fw == "BRZP" || fw == "BRNZP")return trans_BR(instruction);else if (fw == "TRAP" )return trans_TRAP(instruction);else if (fw == ".FILL")return trans_FILL(instruction);else if (fw == ".BLKW")return trans_BLKW(instruction);else if (fw == ".STRINGZ")return trans_STRINGZ(instruction);else if(fw==".END")return "";return "";
}//translate each instrutions(from a lc-3 code line string to it's machine code string(end with \n))
//mainly by handlding the operation on STRING
string trans_ADD(string instruction){string code;code.resize(18);code = "0001";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, 'R', 2) + 1;int SR1 = instruction[d2] - '0';code.append(dec_to_2com(SR1, 3));int d3 = s_find(instruction, '#', 1);if (d3 != -1){code.append("1");int imm5 = atoi(instruction.substr(d3 + 1).c_str());code.append(dec_to_2com(imm5, 5));}int d4 = s_find(instruction, 'x', 1);if (d4 != -1){code.append("1");int imm5 = atoi_hex(instruction.substr(d4 + 1).c_str());code.append(dec_to_2com(imm5, 5));}else if (d3 == -1 && d4 == -1){code.append("000");d3 = s_find(instruction, 'R', 3) + 1;int SR3 = instruction[d3] - '0';code.append(dec_to_2com(SR3, 3));}return code;
}string trans_AND(string instruction){string code;code.resize(18);code = "0101";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, 'R', 2) + 1;int SR1 = instruction[d2] - '0';code.append(dec_to_2com(SR1, 3));int d3 = s_find(instruction, '#', 1);if (d3 != -1){code.append("1");int imm5 = atoi(instruction.substr(d3 + 1).c_str());code.append(dec_to_2com(imm5, 5));}int d4 = s_find(instruction, 'x', 1);if (d4 != -1){code.append("1");int imm5 = atoi_hex(instruction.substr(d4 + 1).c_str());code.append(dec_to_2com(imm5, 5));}else if (d3 == -1 && d4 == -1){code.append("000");d3 = s_find(instruction, 'R', 3) + 1;int SR3 = instruction[d3] - '0';code.append(dec_to_2com(SR3, 3));}return code;
}string trans_NOT(string instruction){string code;code.resize(18);code = "1001";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, 'R', 2) + 1;int SR = instruction[d2] - '0';code.append(dec_to_2com(SR, 3));code.append("111111");return code;
}string trans_LD(string instruction){string code;code.resize(18);code = "0010";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, '#', 1);int off9;if (d2 != -1)off9 = atoi(instruction.substr(d2 + 1).c_str());else{int d3 = s_find(instruction, ',', 1) + 1;string last = instruction.substr(d3, instruction.length() - d3);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off9 = address - current_address-1;}code.append(dec_to_2com(off9, 9));return code;
}string trans_LDR(string instruction){string code;code.resize(18);code = "0110";int d1 = s_find(instruction, 'R', 2) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, 'R', 3) + 1;int BR = instruction[d2] - '0';code.append(dec_to_2com(BR, 3));int d3 = s_find(instruction, '#', 1);if (d3 != -1){int imm6 = atoi(instruction.substr(d3 + 1).c_str());code.append(dec_to_2com(imm6, 6));}int d4 = s_find(instruction, 'x', 1);if (d4 != -1){int imm6 = atoi_hex(instruction.substr(d4 + 1).c_str());code.append(dec_to_2com(imm6, 6));}return code;
}string trans_LDI(string instruction){string code;code.resize(18);code = "1010";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, '#', 1);int off9;if (d2 != -1)off9 = atoi(instruction.substr(d2 + 1).c_str());else{int d3 = s_find(instruction, ',', 1) + 1;string last = instruction.substr(d3, instruction.length() - d3);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off9 = address - current_address-1;}code.append(dec_to_2com(off9, 9));return code;
}string trans_LEA(string instruction){string code;code.resize(18);code = "1110";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, '#', 1);int off9;if (d2 != -1)off9 = atoi(instruction.substr(d2 + 1).c_str());else{int d3 = s_find(instruction, ',', 1) + 1;string last = instruction.substr(d3, instruction.length() - d3);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off9 = address - current_address-1;}code.append(dec_to_2com(off9, 9));return code;
}string trans_ST(string instruction){string code;code.resize(18);code = "0011";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, '#', 1);int off9;if (d2 != -1)off9 = atoi(instruction.substr(d2 + 1).c_str());else{int d3 = s_find(instruction, ',', 1) + 1;string last = instruction.substr(d3, instruction.length() - d3);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off9 = address - current_address-1;}code.append(dec_to_2com(off9, 9));return code;
}string trans_STR(string instruction){string code;code.resize(18);code = "0111";int d1 = s_find(instruction, 'R', 2) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, 'R', 3) + 1;int BR = instruction[d2] - '0'; code.append(dec_to_2com(BR, 3));int d3 = s_find(instruction, '#', 1);if (d3 != -1){int imm6 = atoi(instruction.substr(d3 + 1).c_str());code.append(dec_to_2com(imm6, 6));}int d4 = s_find(instruction, 'x', 1);if (d4 != -1){int imm6 = atoi_hex(instruction.substr(d4 + 1).c_str());code.append(dec_to_2com(imm6, 6));}return code;
}string trans_STI(string instruction){string code;code.resize(18);code = "1011";int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));int d2 = s_find(instruction, '#', 1);int off9;if (d2 != -1)off9 = atoi(instruction.substr(d2 + 1).c_str());else{int d3 = s_find(instruction, ',', 1) + 1;string last = instruction.substr(d3, instruction.length() - d3);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off9 = address - current_address-1;}code.append(dec_to_2com(off9, 9));return code;
}string trans_JMP(string instruction){string code;code.resize(18);code = "1100";code.append("000");int d1 = s_find(instruction, 'R', 1) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));code.append("000000");return code;
}string trans_JSRR(string instruction){string code;code.resize(18);code = "0100";code.append("000");int d1 = s_find(instruction, 'R', 3) + 1;int DR = instruction[d1] - '0';code.append(dec_to_2com(DR, 3));code.append("000000");return code;
}string trans_JSR(string instruction){string code;code.resize(18);code = "01001";int off11;int d = s_find(instruction, 'R', 1) + 1;string last = instruction.substr(d, instruction.length() - d);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off11 = address - current_address-1;code.append(dec_to_2com(off11, 11));return code;
}string trans_RTI(string instruction){string code= "1000000000000000";return code;
}string trans_RET(string instruction){string code= "1100000111000000";return code;
}string trans_BR(string instruction){string code;code.resize(18);code = "0000";int NZP = 0;string br_part = get_first_word(instruction);if (s_find(br_part, 'N', 1) != -1)NZP += 4;if (s_find(br_part, 'Z', 1) != -1)NZP += 2;if (s_find(br_part, 'P', 1) != -1)NZP += 1;if (s_find(br_part, 'N', 1) == -1 && s_find(br_part, 'Z', 1) == -1 && s_find(br_part, 'P', 1) == -1)NZP = 7;code.append(dec_to_2com(NZP, 3));int d2 = s_find(instruction, '#', 1);int off9;if (d2 != -1)off9 = atoi(instruction.substr(d2 + 1).c_str());else{int d3 = max(max(s_find(br_part, 'R', 1) + 1, s_find(br_part, 'N', 1) + 1), max(s_find(br_part, 'Z', 1) + 1, s_find(br_part, 'P', 1) + 1)) + s_find(instruction, 'B', 1);string last = instruction.substr(d3, instruction.length() - d3);string label = get_first_word(last);int address = 3000;if (SA.find(label) != SA.end())address = SA.find(label)->second;off9 = address - current_address-1;}code.append(dec_to_2com(off9, 9));return code;
}string trans_TRAP(string instruction){string code;code.resize(18);code = "11110000";string fw = get_first_word(instruction);string trap_vector8 = "00000000";//written in "TRAP vector8" formulaint d = s_find(instruction, 'x', 1) + 1;trap_vector8 = dec_to_2com(atoi_hex(instruction.substr(d, instruction.length() - d).c_str()), 8);code.append(trap_vector8);return code;
}string trans_FILL(string instruction){string code = "";code.resize(18);int imm16 = 0;int d = s_find(instruction, '#', 1);if (d != -1)imm16 = atoi(instruction.substr(d + 1, instruction.length()-d).c_str());else if (s_find(instruction, 'x', 1) != -1){d = s_find(instruction, 'x', 1);imm16 = atoi_hex(instruction.substr(d + 1, instruction.length()-d).c_str());}elsereturn NULL;code.append(dec_to_2com(imm16, 16));return code;
}string trans_BLKW(string instruction){string line1 = "",line2="";line1.resize(18);line2.resize(18);line1 = "0000000000000000\n";line2 = "0000000000000000";int d = s_find(instruction, '#', 1);int imm16 = atoi(instruction.substr(d + 1, instruction.length()-d).c_str());string codes = "";codes.resize(d * 17 + 1);for (int i = 1; i < imm16; i++)codes.append(line1);codes.append(line2);return codes;
}string trans_STRINGZ(string instruction){string codes = "";int d1 = s_find(instruction, '"', 1) + 1;int d2 = s_find(instruction, '"', 2) - 1;if (d1 > d2){codes.resize(18);codes.append("0000000000000000");return codes;}string str = instruction.substr(d1, d2 - d1 + 1);codes.resize(str.length() * 17 + 1);string line = "";line.resize(18);int i = 0;while (str[i]){line = "";line.append(dec_to_2com((int)str[i], 16));line.append("\n");codes.append(line);i++;}line = "";line.append("0000000000000000");codes.append(line);return codes;
}
其他一些函数
//return the first word (divided by space) in the instructiontence
string get_first_word(string str){int i = 0, j = 0, k = 0;while (str[i] == ' ')i++;string ret = "";for (k = 0, j = i; str[j] != ' ' && str[j]; k++, j++)ret += str[j];return ret;
}int npower_16(int n){//return 16^nint i;long total = 1;if (n >= 1)for (i = 0; i < n; i++)total *= 16;return total;
}//turn a character to the correspond number in hex repreinstructiontation
int Hex2Num(char ch){int value = 0;if (ch >= '0' && ch <= '9')value = ch - 48;if (ch >= 'A' && ch <= 'F')value = ch - 65 + 10;if (ch >= 'a' && ch <= 'f')value = ch - 97 + 10;return value;
}//translate a hex string to it's value
int atoi_hex(string strInValue){strInValue = get_first_word(strInValue);int j = 0;int flag = 1;if (strInValue[0] == '-'){flag = -1;//negativej++;}char hex[9];char str[9];int cnt = 0, i = 0;int sum = 0;while (strInValue[j] != '\0' && strInValue[j] != ' '){hex[cnt] = strInValue[j];cnt++;j++;}hex[cnt] = '\0';for (i = 0; i < cnt; i++)str[i] = hex[cnt - 1 - i];//reversestr[cnt] = '\0';for (i = 0; i < cnt; i++)sum += npower_16(i) * Hex2Num(str[i]);return flag * sum;
}//This function return the position of the time'th character 'c' in the string str
int s_find(string str, char c, int time){int ret = -1;for (int i = 0; str[i] != '\0'; i++){if (str[i] == c && --time <= 0){ret = i;break;}}return ret;
}//change a decimical number n to it's "digit" digits 2's complements
string dec_to_2com(int n, int digit){string imm(digit, '0');int temp = n;int i = digit - 1;if (temp >= 0){while (n){if (i < 0)return NULL;imm[i--] = n % 2 + '0';n /= 2;}}else if (temp < 0){n = -(temp + 1);while (n){if (i < 0)return NULL;imm[i--] = n % 2 + '0';n /= 2;}for (int r = 0; r < digit; r++)imm[r] = '1' - imm[r] + '0';}return imm;
}