用AntlR4实现简单的汇编编译器
最近在學(xué)習(xí)《計(jì)算機(jī)系統(tǒng)要素——從零構(gòu)建現(xiàn)代計(jì)算機(jī)》這本書,花了兩天時(shí)間用antlr4把書中第6章的匯編編譯器實(shí)現(xiàn)了.
輸出的機(jī)器指令和參考答案完全一致.
下面把a(bǔ)ntlr4的代碼貼出來,供后來者參考
/*** Define a grammar called Hello*/ grammar HackAsm; @parser::header{ import java.util.Map; import java.util.HashMap; import java.util.List; import java.util.LinkedList; } @parser::members{class Command{String code; //指令的編碼final static boolean DEBUG=false; //是否為調(diào)試模式}class ACommand extends Command{Integer addr;String ID;void update(){String str = Integer.toBinaryString(addr);if (Command.DEBUG) {System.out.println("addr="+addr);System.out.println("str="+str);}int offset = 16-str.length();if(offset>0){byte[] b= new byte[offset];for(int i=0;i<offset;++i)b[i]='0';str = new String(b)+str; //前面補(bǔ)0if (Command.DEBUG)System.out.println("str="+str);}code = str.substring(0,16);}} }prog locals[List<Command> cmds;Map<String,Integer> sym_table] @init{$ctx.cmds= new LinkedList<Command>();$ctx.sym_table = new HashMap<String,Integer>();Map<String,Integer> sym_table =$ctx.sym_table;//建立預(yù)定義符號(hào)表sym_table.put("SP",0);sym_table.put("LCL",1);sym_table.put("ARG",2);sym_table.put("THIS",3);sym_table.put("THAT",4);sym_table.put("SCREEN",16384);sym_table.put("KBD",24576);for(int i=0;i<16;++i)sym_table.put("R"+i,i); }@after{if (Command.DEBUG) {System.out.println("after prog");System.out.println("processed "+$ctx.cmds.size()+" commands");System.out.println($ctx.sym_table.size()+" syms");}//遍歷每個(gè)A command,把里面的ID變成數(shù)字Map<String,Integer> vars = new HashMap<String,Integer>();Map<String,Integer> syms =$ctx.sym_table;List<Command> cmds = $ctx.cmds;int var_addr = 16;for(Command c:cmds){if (c instanceof ACommand){ACommand cmd = (ACommand) c;String id = cmd.ID; if (id != null){cmd.addr = syms.get(id);if (cmd.addr == null){cmd.addr = vars.get(id);if (cmd.addr == null){cmd.addr = var_addr;vars.put(id,var_addr);++var_addr;}}}cmd.update();}}for(Command c:cmds){System.out.println(c.code); }}: (command? EL)+ {} ; command @after{ProgContext prog = (ProgContext)($ctx.getParent());} :a_command {ProgContext prog = (ProgContext)($ctx.getParent());List<Command> cmds = prog.cmds;cmds.add($a_command.cmd);}|c_command{ProgContext prog = (ProgContext)($ctx.getParent());List<Command> cmds = prog.cmds;cmds.add($c_command.cmd);}|label{ProgContext prog = (ProgContext)($ctx.getParent());Map<String,Integer> sym_table = prog.sym_table;String str =$label.label_name; if (sym_table.containsKey(str))throw new RuntimeException("label "+ str +"has already used!"); elsesym_table.put(str,prog.cmds.size());};label returns[String label_name] :LP ID RP{String str =$ID.text;$label_name= str;if (Command.DEBUG)System.out.println("label="+$label_name); };a_command returns[ACommand cmd] @init{$cmd = new ACommand(); } @after{if (Command.DEBUG)System.out.println("ACommand: addr= "+$cmd.addr+", ID="+$cmd.ID); } :AT NUM{$cmd.addr = $NUM.int;$cmd.ID = null;}|AT ID{$cmd.ID = $ID.text;//System.out.println("ACommand: ID= " +", ID="+$ID.text);}//下面兩個(gè)分支是因?yàn)? 1也屬于NUM,不寫的話運(yùn)行時(shí)會(huì)出問題!|AT ZERO{$cmd.addr = 0;$cmd.ID = null;}|AT ONE{$cmd.addr = 1;$cmd.ID = null;};c_command returns[Command cmd] locals[StringBuilder destcode;String compcode;String jmpcode ] @init{$cmd = new Command();$ctx.destcode = new StringBuilder("000");$ctx.jmpcode ="000";$ctx.compcode ="0000000"; } @after{$cmd.code = "111"+$ctx.compcode+$ctx.destcode+$ctx.jmpcode; if (Command.DEBUG){System.out.println($ctx.getText()+" is a c_command.");System.out.println("dest:"+$ctx.destcode);System.out.println("comp:"+$ctx.compcode);System.out.println("jmp:"+$ctx.jmpcode);System.out.println("code:"+$cmd.code);} } :(dests=(ID|DEST) ASSIGN)? comp (SEMICOLON jmp)?{String dest = $dests.text;if (Command.DEBUG)System.out.println("comp.dests.text"+dest);if (dest!=null){ if(dest.indexOf("A")!=-1)$ctx.destcode.setCharAt(0,'1');if(dest.indexOf("D")!=-1)$ctx.destcode.setCharAt(1,'1');if(dest.indexOf("M")!=-1)$ctx.destcode.setCharAt(2,'1'); }if (Command.DEBUG)System.out.println("comp_part:"+$comp.text);$ctx.compcode = $comp.afield + $comp.code;String JMPstr=$jmp.text;if (JMPstr != null)$ctx.jmpcode = $jmp.code;}//|comp (SEMICOLON jmp)?; comp returns[String code,String afield] @init{$code ="000000";$afield="0"; } @after{if (Command.DEBUG)System.out.println("comp_code:"+$code); } :bin_expr{String str = $bin_expr.text;if (str.indexOf("M")!= -1)$afield="1";if (str.equals("D+1"))$code="011111";else if(str.equals("D-1"))$code="001110";else if (str.equals("A+1") ||str.equals("M+1"))$code="110111";else if (str.equals("A-1") ||str.equals("M-1"))$code="110010"; else if (str.equals("D+A") ||str.equals("D+M"))$code="000010";else if (str.equals("D-A") ||str.equals("D-M"))$code="010011";else if (str.equals("A-D") ||str.equals("M-D"))$code="000111";else if (str.equals("D&A") ||str.equals("D&M"))$code="000000";else if (str.equals("D|A") ||str.equals("D|M"))$code="010101"; }|un_expr{String str = $un_expr.text;if (str.indexOf("M")!= -1)$afield="1";if (str.equals("-1"))$code="111010";else if (str.equals("-D"))$code="001111";else if (str.equals("-A")||str.equals("-M"))$code="110011";else if (str.equals("!D"))$code="001101"; else if (str.equals("!A") ||str.equals("!M"))$code="110001"; }|ZERO{$code="101010";}|ONE{$code="111111";}|DEST{String str =$DEST.text;if (Command.DEBUG)System.out.println("comp.dest.text:"+str);if (str.equals("D"))$code="001100";else if (str.equals("A")||str.equals("M"))$code="110000";if (str.equals("M"))$afield="1"; } ;bin_expr: DEST bin_op (ONE|DEST); bin_op:PLUS|MINUS|BIT_AND|BIT_OR; un_expr:neg_expr|not_expr;neg_expr:MINUS (ONE|DEST); not_expr:BIT_NOT DEST; jmp returns[String code]:'JGT'{$code ="001";}|'JEQ'{$code ="010";}|'JGE'{$code ="011";}|'JLT'{$code ="100";}|'JNE'{$code ="101";}|'JLE'{$code ="110";}|'JMP'{$code ="111";};BLOCK_COMMENT: '/*' .*? '*/' -> channel(HIDDEN);SL_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN) //過濾行注釋,本技巧見課本中文版的第181頁的12.1節(jié),但是要保留最后的回車符!;DEST:'A'|'M'|'D';ZERO:'0'; ONE:'1';ID : [A-Za-z$:.][A-Za-z0-9_$:.]* ; NUM:[0-9][0-9]*;PLUS: '+'; MINUS: '-'; BIT_AND:'&'; BIT_OR:'|'; BIT_NOT:'!';ASSIGN: '='; SEMICOLON : ';' ; LP: '('; RP:')'; AT: '@';EL : '\n'; WS : [ \t\r]+ -> skip ; // skip spaces, tabs, newlines運(yùn)行辦法:把上面的代碼保存到HackAsm.g4文件中,然后和
antlr-4.7.1-complete.jar放到一個(gè)目錄下,設(shè)置好java的環(huán)境變量之后,
進(jìn)入到文件所在的命令行,執(zhí)行下面的命令
java -cp .\antlr-4.7.1-complete.jar;%CLASSPATH% org.antlr.v4.Tool ?HackAsm.g4
javac -cp .\antlr-4.7.1-complete.jar;%CLASSPATH% *.java
如果沒有錯(cuò)誤的話,把書中第6章的所有測試代碼文件也復(fù)制到本目錄下,
然后執(zhí)行
@java -cp .\antlr-4.7.1-complete.jar;%CLASSPATH% org.antlr.v4.gui.TestRig HackAsm prog Add.asm
輸出結(jié)果為
0000000000000010
1110110000010000
0000000000000011
1110000010010000
0000000000000000
1110001100001000
經(jīng)過比對(duì)和書中自帶的匯編編譯器產(chǎn)生的結(jié)果完全一致,說明測試通過
總結(jié)
以上是生活随笔為你收集整理的用AntlR4实现简单的汇编编译器的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 从“被动挖光缆”到“主动剪网线”,蚂蚁金
- 下一篇: 跨域错误的原因及处理方法