OTL終於能寫出點實用的東西了。今天寫的一個MIPS彙編器,能讀入若干行MIPS指令,將其翻譯成機器碼,存入一個文本文件中。目前只能編譯add, sub, and, or, slt, beq, j, lw, sw, nop這幾種指令,並且不能識別註釋。不過對於完成計算機組成實驗來說足夠了。
// A simple and naive MIPS assembler
// Use this command to make: g++ MIPS.cpp -o MIPS
// Usage: ./MIPS CodeFile
#include<iostream>
#include<stdio.h>
#include<memory.h>
#include<cstring>
#include<fstream>
#include<iomanip>
#include<cstdlib>
using namespace std;
// A function used to extract the register number from string "$xx"
int getReg(char* arg){
int reg = 0;
char *p = arg;
while (*p == ' ') p++; // Skip the space at the beginning
if (*p == '$') p++;
else return -1; // Unexpected charaters
// Parse
while(*p >= '0' && *p <= '9'){
reg = 10 * reg + (*p - '0');
p++;
}
while (*p == ' ') p++; // Skip the space following the number
if (*p != '\0') return -1; // Unexpected characters
else return reg;
}
// Output a binary number with a given length to an object of ostream
void printBin(int num, int digi, ostream& os){
char *bin = new char[digi];
memset(bin, '0', digi * sizeof(char));
char *p = bin + (strlen(bin) - 1);
int sign = (num >= 0)? 1: -1;
num = num * sign;
while (num > 0){
if (num & 1) *p = '1';
num >>= 1;
p--;
}
// Transform to the 2's complement
if (sign == -1){
p = bin + (strlen(bin) - 1);
while (p >= bin && *p != '1') p--; // Find the last '1'
if (p >= bin && *p == '1') p--; // Skip the last '1'
while (p >= bin) {
*p = ('0' + '1') - *p;
p--;
}
}
os << bin;
delete bin;
}
int main(int argc, char** argv){
// Check the input
if (argc != 2){
cout << "Error: File name needed." << endl;
exit(-1);
}
ifstream code(argv[1]);
char *objname = new char[strlen(argv[1]) + 10];
strcpy(objname, argv[1]);
int i;
// Find the suffix
for (i = strlen(objname) - 1; i >= 0; i--) if (objname[i] == '.') break;
// Give the output file a ".mips" suffix
if (objname[i] == '.'){
objname[i + 1] = '\0';
strcat(objname, "mips");
}
else strcat(objname, ".mips");
ofstream obj(objname);
delete objname;
/****************************************************
The instruction strctures
R-type:
+------------------------------------------------+
| opcode | rs | rt | rd | shamt | funct |
|--------+-------+-------+-------+-------+-------|
| 6-bit | 5-bit | 5-bit | 5-bit | 5-bit | 6-bit |
+------------------------------------------------+
I-type:
+------------------------------------------------+
| opcode | rs | rt | immediate |
|--------+-------+-------+-----------------------|
| 6-bit | 5-bit | 5-bit | 16-bit |
+------------------------------------------------+
J-type:
+------------------------------------------------+
| opcode | jump address |
|--------+---------------------------------------|
| 6-bit | 26-bit |
+------------------------------------------------+
*/
char command[20], arg1[20], arg2[20], arg3[20];
int line_no = 0;
while (code >> command){
line_no++; // The current line
//code >> command;
if (!strcmp(command, "add")){ // ADD $rd, $rs, $rt
printBin(0, 6, obj); // OpCode
//code >> arg1 >> arg2 >> arg3;
code.getline(arg1, 20, ',');
int rd = getReg(arg1);
if (rd == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20, ',');
int rs = getReg(arg2);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg3, 20);
int rt = getReg(arg3);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(rd, 5, obj);
printBin(0, 5, obj); // shamt
printBin(0x20, 6, obj); // funct
obj << endl;
}
else if (!strcmp(command, "sub")){ // SUB $rd, $rs, $rt
printBin(0, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rd = getReg(arg1);
if (rd == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20, ',');
int rs = getReg(arg2);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg3, 20);
int rt = getReg(arg3);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(rd, 5, obj);
printBin(0, 5, obj); // shamt
printBin(0x22, 6, obj); // funct
obj << endl;
}
else if (!strcmp(command, "and")){ // AND $rd, $rs, $rt
printBin(0, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rd = getReg(arg1);
if (rd == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20, ',');
int rs = getReg(arg2);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg3, 20);
int rt = getReg(arg3);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(rd, 5, obj);
printBin(0, 5, obj); // shamt
printBin(0x24, 6, obj); // funct
obj << endl;
}
else if (!strcmp(command, "or")){// OR $rd, $rs, $rt
printBin(0, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rd = getReg(arg1);
if (rd == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20, ',');
int rs = getReg(arg2);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg3, 20);
int rt = getReg(arg3);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(rd, 5, obj);
printBin(0, 5, obj); // shamt
printBin(0x25, 6, obj); // funct
obj << endl;
}
else if (!strcmp(command, "slt")){// SLT $rd, $rs, $rt
printBin(0, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rd = getReg(arg1);
if (rd == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20, ',');
int rs = getReg(arg2);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg3, 20);
int rt = getReg(arg3);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(rd, 5, obj);
printBin(0, 5, obj); // shamt
printBin(0x2A, 6, obj); // funct
obj << endl;
}
else if (!strcmp(command, "beq")){ // BEQ $rs, $rt, imm
printBin(0x4, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rs = getReg(arg1);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20, ',');
int rt = getReg(arg2);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
int offset;
code >> offset;
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(offset, 16, obj); // immediate
obj << endl;
}
else if (!strcmp(command, "lw")){ // LW $rt, offset($rs)
printBin(0x23, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rt = getReg(arg1);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20);
int offset = 0;
int sign = 1;
char *p = arg2;
while (*p == ' ') p++;
if (*p == '-'){
p++;
sign = -1;
}
// Parse
while (*p >= '0' && *p <= '9'){
offset = 10 * offset + (*p - '0');
p++;
}
offset = sign * offset;
while (*p == ' ') p++; // Skip the space before '('
if (*p == '(') p++;
else {
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
char *q = arg2 + (strlen(arg2) - 1);
while (*q == ' ') q--;
if (*q == ')') *q = '\0'; // Find the ')' and delete it
else { // No ')' found, syntax error occurred
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
int rs = getReg(p);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(offset, 16, obj);
obj << endl;
}
else if (!strcmp(command, "sw")){ // SW $rt, offset($rs)
printBin(0x2B, 6, obj); // OpCode
code.getline(arg1, 20, ',');
int rt = getReg(arg1);
if (rt == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
code.getline(arg2, 20);
int offset = 0;
int sign = 1;
char *p = arg2;
while (*p == ' ') p++;
if (*p == '-'){
p++;
sign = -1;
}
// Parse
while (*p >= '0' && *p <= '9'){
offset = 10 * offset + (*p - '0');
p++;
}
offset = sign * offset;
while (*p == ' ') p++; // Skip the space before '('
if (*p == '(') p++;
else {
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
char *q = arg2 + (strlen(arg2) - 1);
while (*q == ' ') q--;
if (*q == ')') *q = '\0'; // Find the ')' and delete it
else { // No ')' found, syntax error occurred
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
int rs = getReg(p);
if (rs == -1){
cout << "Syntax error at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(rs, 5, obj);
printBin(rt, 5, obj);
printBin(offset, 16, obj);
obj << endl;
}
else if (!strcmp(command, "j")){ // J addr
printBin(0x02, 6, obj); // OpCode
int addr;
code >> addr;
if (addr < 0){ // Negative address is not allowed
cout << "Invalid address at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
printBin(addr, 26, obj);
obj << endl;
}
else if (!strcmp(command, "nop")){ // NOP
printBin(0, 32, obj);
obj << endl;
}
else { // No other instructions allowed
cout << "Invalid instruction at Line " << line_no << "." << endl;
code.close();
obj.close();
exit(-1);
}
}
code.close();
obj.close();
return 0;
}