彻底弄懂dalvik字节码【二】
【一】中講到了最重要的dvmInterpret,繼續跟:
void dvmInterpret(Thread* self, const Method* method, JValue* pResult) {InterpSaveState interpSaveState;ExecutionSubModes savedSubModes;#if defined(WITH_JIT)/* Target-specific save/restore */double calleeSave[JIT_CALLEE_SAVE_DOUBLE_COUNT];/** If the previous VM left the code cache through single-stepping the* inJitCodeCache flag will be set when the VM is re-entered (for example,* in self-verification mode we single-step NEW_INSTANCE which may re-enter* the VM through findClassFromLoaderNoInit). Because of that, we cannot* assert that self->inJitCodeCache is NULL here.*/ #endif/** Save interpreter state from previous activation, linking* new to last.*/interpSaveState = self->interpSave;self->interpSave.prev = &interpSaveState;/** Strip out and save any flags that should not be inherited by* nested interpreter activation.*/savedSubModes = (ExecutionSubModes)(self->interpBreak.ctl.subMode & LOCAL_SUBMODE);if (savedSubModes != kSubModeNormal) {dvmDisableSubMode(self, savedSubModes);} #if defined(WITH_JIT)dvmJitCalleeSave(calleeSave); #endif#if defined(WITH_TRACKREF_CHECKS)self->interpSave.debugTrackedRefStart =dvmReferenceTableEntries(&self->internalLocalRefTable); #endifself->debugIsMethodEntry = true; #if defined(WITH_JIT)/* Initialize the state to kJitNot */self->jitState = kJitNot; #endif/** Initialize working state.** No need to initialize "retval".*/self->interpSave.method = method;self->interpSave.curFrame = (u4*) self->interpSave.curFrame;self->interpSave.pc = method->insns;assert(!dvmIsNativeMethod(method));/** Make sure the class is ready to go. Shouldn't be possible to get* here otherwise.*/if (method->clazz->status < CLASS_INITIALIZING ||method->clazz->status == CLASS_ERROR){ALOGE("ERROR: tried to execute code in unprepared class '%s' (%d)",method->clazz->descriptor, method->clazz->status);dvmDumpThread(self, false);dvmAbort();}typedef void (*Interpreter)(Thread*);Interpreter stdInterp;if (gDvm.executionMode == kExecutionModeInterpFast)stdInterp = dvmMterpStd; #if defined(WITH_JIT)else if (gDvm.executionMode == kExecutionModeJit ||gDvm.executionMode == kExecutionModeNcgO0 ||gDvm.executionMode == kExecutionModeNcgO1)stdInterp = dvmMterpStd; #endifelsestdInterp = dvmInterpretPortable;// Call the interpreter(*stdInterp)(self);*pResult = self->interpSave.retval;/* Restore interpreter state from previous activation */self->interpSave = interpSaveState; #if defined(WITH_JIT)dvmJitCalleeRestore(calleeSave); #endifif (savedSubModes != kSubModeNormal) {dvmEnableSubMode(self, savedSubModes);} }這個方法中先保存了前一個方法的狀態,然后初始化當前方法的狀態,比如設置pc指向方法的字節碼開始處等。然后調用dvmInterpretPortable開始解釋執行,執行完畢后,恢復了前一個方法的狀態。
繼續跟dvmInterpretPortable:
void dvmInterpretPortable(Thread* self) { #if defined(EASY_GDB)StackSaveArea* debugSaveArea = SAVEAREA_FROM_FP(self->interpSave.curFrame); #endifDvmDex* methodClassDex; // curMethod->clazz->pDvmDexJValue retval;/* core state */const Method* curMethod; // method we're interpretingconst u2* pc; // program counteru4* fp; // frame pointeru2 inst; // current instruction/* instruction decoding */u4 ref; // 16 or 32-bit quantity fetched directlyu2 vsrc1, vsrc2, vdst; // usually used for register indexes/* method call setup */const Method* methodToCall;bool methodCallRange;/* static computed goto table */DEFINE_GOTO_TABLE(handlerTable);/* copy state in */curMethod = self->interpSave.method;pc = self->interpSave.pc;fp = self->interpSave.curFrame;retval = self->interpSave.retval; /* only need for kInterpEntryReturn? */methodClassDex = curMethod->clazz->pDvmDex;LOGVV("threadid=%d: %s.%s pc=%#x fp=%p",self->threadId, curMethod->clazz->descriptor, curMethod->name,pc - curMethod->insns, fp);/** Handle any ongoing profiling and prep for debugging.*/if (self->interpBreak.ctl.subMode != 0) {TRACE_METHOD_ENTER(self, curMethod);self->debugIsMethodEntry = true; // Always true on startup}/** DEBUG: scramble this to ensure we're not relying on it.*/methodToCall = (const Method*) -1;#if 0if (self->debugIsMethodEntry) {ILOGD("|-- Now interpreting %s.%s", curMethod->clazz->descriptor,curMethod->name);DUMP_REGS(curMethod, self->interpSave.curFrame, false);} #endifFINISH(0); /* fetch and execute first instruction *//*--- start of opcodes ---*/細心的朋友在閱讀源碼的時候,可能會發現這個方法的方法體括號居然沒有閉合,這是有原因的,因為這里面有很多的宏定義,宏定義展開后,才是完整的方法體。
我們可以看到,這個方法中,直接從之前分配的棧幀中獲取各類信息,比如當前執行的method等,同時申明了若干變量:pc、fp、inst等,這些變量在后面分析的宏中被直接賦值和使用,所以在后面分析宏的時候,留意這些變量。
第一個宏DEFINE_GOTO_TABLE:
#define DEFINE_GOTO_TABLE(_name) \static const void* _name[kNumPackedOpcodes] = { \/* BEGIN(libdex-goto-table); GENERATED AUTOMATICALLY BY opcode-gen */ \H(OP_NOP), \H(OP_MOVE), \H(OP_MOVE_FROM16), \H(OP_MOVE_16), \H(OP_MOVE_WIDE), \H(OP_MOVE_WIDE_FROM16), \H(OP_MOVE_WIDE_16), \H(OP_MOVE_OBJECT), \H(OP_MOVE_OBJECT_FROM16), \H(OP_MOVE_OBJECT_16), \H(OP_MOVE_RESULT), \H(OP_MOVE_RESULT_WIDE), \H(OP_MOVE_RESULT_OBJECT), \H(OP_MOVE_EXCEPTION), \H(OP_RETURN_VOID), \H(OP_RETURN), \H(OP_RETURN_WIDE), \H(OP_RETURN_OBJECT), \H(OP_CONST_4), \H(OP_CONST_16), \H(OP_CONST), \H(OP_CONST_HIGH16), \H(OP_CONST_WIDE_16), \H(OP_CONST_WIDE_32), \H(OP_CONST_WIDE), \H(OP_CONST_WIDE_HIGH16), \H(OP_CONST_STRING), \H(OP_CONST_STRING_JUMBO), \H(OP_CONST_CLASS), \H(OP_MONITOR_ENTER), \H(OP_MONITOR_EXIT), \H(OP_CHECK_CAST), \H(OP_INSTANCE_OF), \H(OP_ARRAY_LENGTH), \H(OP_NEW_INSTANCE), \H(OP_NEW_ARRAY), \H(OP_FILLED_NEW_ARRAY), \H(OP_FILLED_NEW_ARRAY_RANGE), \H(OP_FILL_ARRAY_DATA), \H(OP_THROW), \H(OP_GOTO), \H(OP_GOTO_16), \H(OP_GOTO_32), \H(OP_PACKED_SWITCH), \H(OP_SPARSE_SWITCH), \H(OP_CMPL_FLOAT), \H(OP_CMPG_FLOAT), \H(OP_CMPL_DOUBLE), \H(OP_CMPG_DOUBLE), \H(OP_CMP_LONG), \H(OP_IF_EQ), \H(OP_IF_NE), \H(OP_IF_LT), \H(OP_IF_GE), \H(OP_IF_GT), \H(OP_IF_LE), \H(OP_IF_EQZ), \H(OP_IF_NEZ), \H(OP_IF_LTZ), \H(OP_IF_GEZ), \H(OP_IF_GTZ), \H(OP_IF_LEZ), \H(OP_UNUSED_3E), \H(OP_UNUSED_3F), \H(OP_UNUSED_40), \H(OP_UNUSED_41), \H(OP_UNUSED_42), \H(OP_UNUSED_43), \H(OP_AGET), \H(OP_AGET_WIDE), \H(OP_AGET_OBJECT), \H(OP_AGET_BOOLEAN), \H(OP_AGET_BYTE), \H(OP_AGET_CHAR), \H(OP_AGET_SHORT), \H(OP_APUT), \H(OP_APUT_WIDE), \H(OP_APUT_OBJECT), \H(OP_APUT_BOOLEAN), \H(OP_APUT_BYTE), \H(OP_APUT_CHAR), \H(OP_APUT_SHORT), \H(OP_IGET), \H(OP_IGET_WIDE), \H(OP_IGET_OBJECT), \H(OP_IGET_BOOLEAN), \H(OP_IGET_BYTE), \H(OP_IGET_CHAR), \H(OP_IGET_SHORT), \H(OP_IPUT), \H(OP_IPUT_WIDE), \H(OP_IPUT_OBJECT), \H(OP_IPUT_BOOLEAN), \H(OP_IPUT_BYTE), \H(OP_IPUT_CHAR), \H(OP_IPUT_SHORT), \H(OP_SGET), \H(OP_SGET_WIDE), \H(OP_SGET_OBJECT), \H(OP_SGET_BOOLEAN), \H(OP_SGET_BYTE), \H(OP_SGET_CHAR), \H(OP_SGET_SHORT), \H(OP_SPUT), \H(OP_SPUT_WIDE), \H(OP_SPUT_OBJECT), \H(OP_SPUT_BOOLEAN), \H(OP_SPUT_BYTE), \H(OP_SPUT_CHAR), \H(OP_SPUT_SHORT), \H(OP_INVOKE_VIRTUAL), \H(OP_INVOKE_SUPER), \H(OP_INVOKE_DIRECT), \H(OP_INVOKE_STATIC), \H(OP_INVOKE_INTERFACE), \H(OP_UNUSED_73), \H(OP_INVOKE_VIRTUAL_RANGE), \H(OP_INVOKE_SUPER_RANGE), \H(OP_INVOKE_DIRECT_RANGE), \H(OP_INVOKE_STATIC_RANGE), \H(OP_INVOKE_INTERFACE_RANGE), \H(OP_UNUSED_79), \H(OP_UNUSED_7A), \H(OP_NEG_INT), \H(OP_NOT_INT), \H(OP_NEG_LONG), \H(OP_NOT_LONG), \H(OP_NEG_FLOAT), \H(OP_NEG_DOUBLE), \H(OP_INT_TO_LONG), \H(OP_INT_TO_FLOAT), \H(OP_INT_TO_DOUBLE), \H(OP_LONG_TO_INT), \H(OP_LONG_TO_FLOAT), \H(OP_LONG_TO_DOUBLE), \H(OP_FLOAT_TO_INT), \H(OP_FLOAT_TO_LONG), \H(OP_FLOAT_TO_DOUBLE), \H(OP_DOUBLE_TO_INT), \H(OP_DOUBLE_TO_LONG), \H(OP_DOUBLE_TO_FLOAT), \H(OP_INT_TO_BYTE), \H(OP_INT_TO_CHAR), \H(OP_INT_TO_SHORT), \H(OP_ADD_INT), \H(OP_SUB_INT), \H(OP_MUL_INT), \H(OP_DIV_INT), \H(OP_REM_INT), \H(OP_AND_INT), \H(OP_OR_INT), \H(OP_XOR_INT), \H(OP_SHL_INT), \H(OP_SHR_INT), \H(OP_USHR_INT), \H(OP_ADD_LONG), \H(OP_SUB_LONG), \H(OP_MUL_LONG), \H(OP_DIV_LONG), \H(OP_REM_LONG), \H(OP_AND_LONG), \H(OP_OR_LONG), \H(OP_XOR_LONG), \H(OP_SHL_LONG), \H(OP_SHR_LONG), \H(OP_USHR_LONG), \H(OP_ADD_FLOAT), \H(OP_SUB_FLOAT), \H(OP_MUL_FLOAT), \H(OP_DIV_FLOAT), \H(OP_REM_FLOAT), \H(OP_ADD_DOUBLE), \H(OP_SUB_DOUBLE), \H(OP_MUL_DOUBLE), \H(OP_DIV_DOUBLE), \H(OP_REM_DOUBLE), \H(OP_ADD_INT_2ADDR), \H(OP_SUB_INT_2ADDR), \H(OP_MUL_INT_2ADDR), \H(OP_DIV_INT_2ADDR), \H(OP_REM_INT_2ADDR), \H(OP_AND_INT_2ADDR), \H(OP_OR_INT_2ADDR), \H(OP_XOR_INT_2ADDR), \H(OP_SHL_INT_2ADDR), \H(OP_SHR_INT_2ADDR), \H(OP_USHR_INT_2ADDR), \H(OP_ADD_LONG_2ADDR), \H(OP_SUB_LONG_2ADDR), \H(OP_MUL_LONG_2ADDR), \H(OP_DIV_LONG_2ADDR), \H(OP_REM_LONG_2ADDR), \H(OP_AND_LONG_2ADDR), \H(OP_OR_LONG_2ADDR), \H(OP_XOR_LONG_2ADDR), \H(OP_SHL_LONG_2ADDR), \H(OP_SHR_LONG_2ADDR), \H(OP_USHR_LONG_2ADDR), \H(OP_ADD_FLOAT_2ADDR), \H(OP_SUB_FLOAT_2ADDR), \H(OP_MUL_FLOAT_2ADDR), \H(OP_DIV_FLOAT_2ADDR), \H(OP_REM_FLOAT_2ADDR), \H(OP_ADD_DOUBLE_2ADDR), \H(OP_SUB_DOUBLE_2ADDR), \H(OP_MUL_DOUBLE_2ADDR), \H(OP_DIV_DOUBLE_2ADDR), \H(OP_REM_DOUBLE_2ADDR), \H(OP_ADD_INT_LIT16), \H(OP_RSUB_INT), \H(OP_MUL_INT_LIT16), \H(OP_DIV_INT_LIT16), \H(OP_REM_INT_LIT16), \H(OP_AND_INT_LIT16), \H(OP_OR_INT_LIT16), \H(OP_XOR_INT_LIT16), \H(OP_ADD_INT_LIT8), \H(OP_RSUB_INT_LIT8), \H(OP_MUL_INT_LIT8), \H(OP_DIV_INT_LIT8), \H(OP_REM_INT_LIT8), \H(OP_AND_INT_LIT8), \H(OP_OR_INT_LIT8), \H(OP_XOR_INT_LIT8), \H(OP_SHL_INT_LIT8), \H(OP_SHR_INT_LIT8), \H(OP_USHR_INT_LIT8), \H(OP_IGET_VOLATILE), \H(OP_IPUT_VOLATILE), \H(OP_SGET_VOLATILE), \H(OP_SPUT_VOLATILE), \H(OP_IGET_OBJECT_VOLATILE), \H(OP_IGET_WIDE_VOLATILE), \H(OP_IPUT_WIDE_VOLATILE), \H(OP_SGET_WIDE_VOLATILE), \H(OP_SPUT_WIDE_VOLATILE), \H(OP_BREAKPOINT), \H(OP_THROW_VERIFICATION_ERROR), \H(OP_EXECUTE_INLINE), \H(OP_EXECUTE_INLINE_RANGE), \H(OP_INVOKE_OBJECT_INIT_RANGE), \H(OP_RETURN_VOID_BARRIER), \H(OP_IGET_QUICK), \H(OP_IGET_WIDE_QUICK), \H(OP_IGET_OBJECT_QUICK), \H(OP_IPUT_QUICK), \H(OP_IPUT_WIDE_QUICK), \H(OP_IPUT_OBJECT_QUICK), \H(OP_INVOKE_VIRTUAL_QUICK), \H(OP_INVOKE_VIRTUAL_QUICK_RANGE), \H(OP_INVOKE_SUPER_QUICK), \H(OP_INVOKE_SUPER_QUICK_RANGE), \H(OP_IPUT_OBJECT_VOLATILE), \H(OP_SGET_OBJECT_VOLATILE), \H(OP_SPUT_OBJECT_VOLATILE), \H(OP_UNUSED_FF), \/* END(libdex-goto-table) */ \};這個宏展開了就是定義了一個指針數組handlerTable,共256項,每一項對應dalvik的一個操作碼。
這個指針數組是在dvmInterpretPortable被展開的,也就是說是局部變量,指令的跳轉,就是在這張表中跳轉,與傳統的方法調用相比,省去了方法調用的棧構造,執行效率得到提升。但是這對編碼的要求就很高,其中用到大量的宏就可以看出他們的深厚功底。
繼續分析宏H:
# define H(_op) &&op_##_op其中&&表示間接引用,##表示字符串拼接。比如說H(OP_NOP)展開就是:&&op_OP_NOP,也就是對op_OP_NOP的間接引用(指針)。
而op_OP_NOP又是通過另外一個宏HANDLE_OPCODE來定義的:
# define HANDLE_OPCODE(_op) op_##_op:. HANDLE_OPCODE(OP_NOP)展開就是:op_OP_NOP:。
注意最后的冒號,這表示它其實是一個位置標簽。
所以handlerTable就是若干地址標簽的引用數組。
回到dvmInterpretPortable,繼續分析宏FINISH:
# define FINISH(_offset) { \ADJUST_PC(_offset); \inst = FETCH(0); \if (self->interpBreak.ctl.subMode) { \dvmCheckBefore(pc, fp, self); \} \goto *handlerTable[INST_INST(inst)]; \} # define FINISH_BKPT(_opcode) { \goto *handlerTable[_opcode]; \}#define OP_END其中的宏ADJUST_PC:
#ifdef CHECK_BRANCH_OFFSETS # define ADJUST_PC(_offset) do { \int myoff = _offset; /* deref only once */ \if (pc + myoff < curMethod->insns || \pc + myoff >= curMethod->insns + dvmGetMethodInsnsSize(curMethod)) \{ \char* desc; \desc = dexProtoCopyMethodDescriptor(&curMethod->prototype); \ALOGE("Invalid branch %d at 0x%04x in %s.%s %s", \myoff, (int) (pc - curMethod->insns), \curMethod->clazz->descriptor, curMethod->name, desc); \free(desc); \dvmAbort(); \} \pc += myoff; \EXPORT_EXTRA_PC(); \} while (false) #else # define ADJUST_PC(_offset) do { \pc += _offset; \EXPORT_EXTRA_PC(); \} while (false) #endif其實就是將pc調整_offset個偏移量。
接下來就是宏FETCH:
#define FETCH(_offset) (pc[(_offset)])inst = FETCH(0);就是從pc的0偏移處開始取指令(兩個字節,前面的申明: u2 inst)存放到inst中。
然后通過宏INST_INST,得到該指令在handlerTable中的索引:
#define INST_INST(_inst) ((_inst) & 0xff)也就是說是低字節是操作碼的索引號。當獲取到索引號之后,就通過handlerTable跳轉到對應的代碼處開始執行。
前面我們知道,通過宏HANDLE_OPCODE對標簽進行定義,在dalvik/vm/mterp/c目錄下,對每一個操作碼都有個文件,里面對應就是其HANDLE_OPCODE標簽的定義,也就是其實現細節:
我們以OP_NOP為例分析一下:
HANDLE_OPCODE(OP_NOP)FINISH(1); OP_END其邏輯就是啥也沒干,繼續讀取下一條指令FINISH(1)執行。
ok,先到這里,下一篇以一個實際的例子來說明具體的解析過程。
作者:difcareer
鏈接:http://www.jianshu.com/p/90cef9026c9e
來源:簡書
著作權歸作者所有。商業轉載請聯系作者獲得授權,非商業轉載請注明出處。 與50位技術專家面對面20年技術見證,附贈技術全景圖
總結
以上是生活随笔為你收集整理的彻底弄懂dalvik字节码【二】的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 彻底弄懂dalvik字节码【一】
- 下一篇: 彻底弄懂dalvik字节码【三】