项目的完整代码在 C2j-Compiler https://github.com/dejavudwh/C2j-Complier
前言
在上一篇解释完了一些基础的 Java 字节码指令后, 就可以正式进入真正的代码生成部分了. 但是这部分先说的是代码生成依靠的几个类, 也就是用来生成指令的操作.
这一篇用到的文件都在 codegen 下:
Directive.java
Instruction.java
CodeGenerator.java
ProgramGenerator.java
Directive.java
这个是枚举类, 用来生成一些比较特殊的指令
都生成像声明一个类或者一个方法的范围的指令, 比较简单.
- public enum Directive {
- CLASS_PUBLIC(".class public"),
- END_CLASS(".end class"),
- SUPER(".super"),
- FIELD_PRIVATE_STATIC(".field private static"),
- METHOD_STATIC(".method static"),
- METHOD_PUBLIC(".method public"),
- FIELD_PUBLIC(".field public"),
- METHOD_PUBBLIC_STATIC(".method public static"),
- END_METHOD(".end method"),
- LIMIT_LOCALS(".limit locals"),
- LIMIT_STACK(".limit stack"),
- VAR(".var"),
- LINE(".line");
- private String text;
- Directive(String text) {
- this.text = text;
- }
- public String toString() {
- return text;
- }
- }
Instruction.java
这也是一个枚举类, 用来生成一些基本的指令
- public enum Instruction {
- LDC("ldc"),
- GETSTATIC("getstatic"),
- SIPUSH("sipush"),
- IADD("iadd"),
- IMUL("imul"),
- ISUB("isub"),
- IDIV("idiv"),
- INVOKEVIRTUAL("invokevirtual"),
- INVOKESTATIC("invokestatic"),
- INVOKESPECIAL("invokespecial"),
- RETURN("return"),
- IRETURN("ireturn"),
- ILOAD("iload"),
- ISTORE("istore"),
- NEWARRAY("newarray"),
- NEW("new"),
- DUP("dup"),
- ASTORE("astore"),
- IASTORE("iastore"),
- ALOAD("aload"),
- PUTFIELD("putfield"),
- GETFIELD("getfield"),
- ANEWARRAY("anewarray"),
- AASTORE("aastore"),
- AALOAD("aaload"),
- IF_ICMPEG("if_icmpeq"),
- IF_ICMPNE("if_icmpne"),
- IF_ICMPLT("if_icmplt"),
- IF_ICMPGE("if_icmpge"),
- IF_ICMPGT("if_icmpgt"),
- IF_ICMPLE("if_icmple"),
- GOTO("goto"),
- IALOAD("iaload");
- private String text;
- Instruction(String s) {
- this.text = s;
- }
- public String toString() {
- return text;
- }
- }
CodeGenerator.java
重点来了, 生成的逻辑主要都在 CodeGenerator 和 ProgramGenerator 里, CodeGenerator 是 ProgramGenerator 的父类
CodeGenerator 的构造函数 new 了一个输出流, 用来输出字节码到 xxx.j 里
- public CodeGenerator() {
- String assemblyFileName = programName + ".j";
- try {
- bytecodeFile = new PrintWriter(new PrintStream(new
- File(assemblyFileName)));
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- }
- }
emit,emitString,emitDirective,emitBlankLine 都属于输出基本指令的方法, 都有多个重载方法来应对不一样操作和操作数. 需要注意的是, 有的指令可能需要先缓存起来, 在最后的时候一起提交, 比如 buffered,classDefine 就是用来判断是不是应该先缓存的布尔值
- public void emitString(String s) {
- if (buffered) {
- bufferedContent += s + "\n";
- return;
- }
- if (classDefine) {
- classDefinition += s + "\n";
- return;
- }
- bytecodeFile.print(s);
- bytecodeFile.flush();
- }
- public void emit(Instruction opcode) {
- if (buffered) {
- bufferedContent += "\t" + opcode.toString() + "\n";
- return;
- }
- if (classDefine) {
- classDefinition += "\t" + opcode.toString() + "\n";
- return;
- }
- bytecodeFile.println("\t" + opcode.toString());
- bytecodeFile.flush();
- ++instructionCount;
- }
- public void emitDirective(Directive directive, String operand1, String operand2, String operand3) {
- if (buffered) {
- bufferedContent += directive.toString() + "" + operand1 +" "+ operand2 +" "+ operand3 +"\n";
- return;
- }
- if (classDefine) {
- classDefinition += directive.toString() + "" + operand1 +" "+ operand2 +" "+ operand3 +"\n";
- return;
- }
- bytecodeFile.println(directive.toString() + "" + operand1 +" "+ operand2 +" " + operand3);
- ++instructionCount;
- }
- public void emitBlankLine() {
- if (buffered) {
- bufferedContent += "\n";
- return;
- }
- if (classDefine) {
- classDefinition += "\n";
- return;
- }
- bytecodeFile.println();
- bytecodeFile.flush();
- }
ProgramGenerator.java
ProgramGenerator 继承了 CodeGenerator, 也就是继承了一些基本的操作, 在上一篇像结构体, 数组的指令输出都在这个类里
处理嵌套
先看四个属性, 这四个属性主要是就来处理嵌套的分支和循环.
- private int branch_count = 0;
- private int branch_out = 0;
- private String embedded = "";
- private int loopCount = 0;
当没嵌套一个 ifelse 语句时候 embedded 属性就会加上一个字符'i', 而当退出一个分支的时候, 就把这个'i'切割掉
branch_count 和 branch_out 都用来标志相同作用域的分支跳转
也就是说如果有嵌套就用 embedded 来处理, 如果是用一个作用域的分支就用 branch_count 和 branch_out 来做标志
- public void incraseIfElseEmbed() {
- embedded += "i";
- }
- public void decraseIfElseEmbed() {
- embedded = embedded.substring(1);
- }
- public void emitBranchOut() {
- String s = "\n" + embedded + "branch_out" + branch_out + ":\n";
- this.emitString(s);
- branch_out++;
- }
loopCount 则是对嵌套循环的处理
- public void emitLoopBranch() {
- String s = "\n" + "loop" + loopCount + ":" + "\n";
- emitString(s);
- }
- public String getLoopBranch() {
- return "loop" + loopCount;
- }
- public void increaseLoopCount() {
- loopCount++;
- }
处理结构体
putStructToClassDeclaration 是定义结构体的, 也就是 new 一个类. declareStructAsClass 则是处理结构体里的变量, 也就是相当于处理类的属性
结构体如果已经类的定义的话, 就会加入 structNameList, 不要进行重复的定义
symbol.getValueSetter() 如果不是空的话就表明是一个结构体数组, 这样就直接从数组加载这个实例, 不用在堆栈上创建
declareStructAsClass 则是依照上一篇说的 Java 字节码有关类的指令来创建一个类
- public void putStructToClassDeclaration(Symbol symbol) {
- Specifier sp = symbol.getSpecifierByType(Specifier.STRUCTURE);
- if (sp == null) {
- return;
- }
- StructDefine struct = sp.getStruct();
- if (structNameList.contains(struct.getTag())) {
- return;
- } else {
- structNameList.add(struct.getTag());
- }
- if (symbol.getValueSetter() == null) {
- this.emit(Instruction.NEW, struct.getTag());
- this.emit(Instruction.DUP);
- this.emit(Instruction.INVOKESPECIAL, struct.getTag() + "/" + "<init>()V");
- int idx = this.getLocalVariableIndex(symbol);
- this.emit(Instruction.ASTORE, "" + idx);
- }
- declareStructAsClass(struct);
- }
- private void declareStructAsClass(StructDefine struct) {
- this.setClassDefinition(true);
- this.emitDirective(Directive.CLASS_PUBLIC, struct.getTag());
- this.emitDirective(Directive.SUPER, "java/lang/Object");
- Symbol fields = struct.getFields();
- do {
- String fieldName = fields.getName() + " ";
- if (fields.getDeclarator(Declarator.ARRAY) != null) {
- fieldName += "[";
- }
- if (fields.hasType(Specifier.INT)) {
- fieldName += "I";
- } else if (fields.hasType(Specifier.CHAR)) {
- fieldName += "C";
- } else if (fields.hasType(Specifier.CHAR) && fields.getDeclarator(Declarator.POINTER) != null) {
- fieldName += "Ljava/lang/String;";
- }
- this.emitDirective(Directive.FIELD_PUBLIC, fieldName);
- fields = fields.getNextSymbol();
- } while (fields != null);
- this.emitDirective(Directive.METHOD_PUBLIC, "<init>()V");
- this.emit(Instruction.ALOAD, "0");
- String superInit = "java/lang/Object/<init>()V";
- this.emit(Instruction.INVOKESPECIAL, superInit);
- fields = struct.getFields();
- do {
- this.emit(Instruction.ALOAD, "0");
- String fieldName = struct.getTag() + "/" + fields.getName();
- String fieldType = "";
- if (fields.hasType(Specifier.INT)) {
- fieldType = "I";
- this.emit(Instruction.SIPUSH, "0");
- } else if (fields.hasType(Specifier.CHAR)) {
- fieldType = "C";
- this.emit(Instruction.SIPUSH, "0");
- } else if (fields.hasType(Specifier.CHAR) && fields.getDeclarator(Declarator.POINTER) != null) {
- fieldType = "Ljava/lang/String;";
- this.emit(Instruction.LDC, " ");
- }
- String classField = fieldName + " " + fieldType;
- this.emit(Instruction.PUTFIELD, classField);
- fields = fields.getNextSymbol();
- } while (fields != null);
- this.emit(Instruction.RETURN);
- this.emitDirective(Directive.END_METHOD);
- this.emitDirective(Directive.END_CLASS);
- this.setClassDefinition(false);
- }
获取堆栈信息
其它有关 Java 字节码其实都是根据上一篇来完成的, 逻辑不复杂, 现在来看一个方法: getLocalVariableIndex, 这个方法是获取变量当前在队列里的位置的
先拿到当前执行的函数, 然后拿到函数的对应参数, 再反转 (这和参数压栈的顺序有关)
然后把当前符号对应作用域的符号都添加到列表里
之后遍历这个列表就可以算出这个符号对应在队列里的位置
- public int getLocalVariableIndex(Symbol symbol) {
- TypeSystem typeSys = TypeSystem.getInstance();
- String funcName = nameStack.peek();
- Symbol funcSym = typeSys.getSymbolByText(funcName, 0, "main");
- ArrayList<Symbol> localVariables = new ArrayList<>();
- Symbol s = funcSym.getArgList();
- while (s != null) {
- localVariables.add(s);
- s = s.getNextSymbol();
- }
- Collections.reverse(localVariables);
- ArrayList<Symbol> list = typeSys.getSymbolsByScope(symbol.getScope());
- for (int i = 0; i < list.size(); i++) {
- if (!localVariables.contains(list.get(i))) {
- localVariables.add(list.get(i));
- }
- }
- for (int i = 0; i < localVariables.size(); i++) {
- if (localVariables.get(i) == symbol) {
- return i;
- }
- }
- return -1;
- }
小结
这一篇主要是根据上一篇的 JVM 字节码来对不同的操作提供不同的方法来去输出这些指令
欢迎 Star!
来源: https://www.cnblogs.com/secoding/p/11388347.html