/* distorm.c diStorm3 C Library Interface diStorm3 - Powerful disassembler for X86/AMD64 http://ragestorm.net/distorm/ distorm at gmail dot com Copyright (C) 2003-2016 Gil Dabah This library is licensed under the BSD license. See the file COPYING. */ #include "distorm/distorm.h" #include "config.h" #include "decoder.h" #include "x86defs.h" #include "textdefs.h" #include "wstring.h" #include "distorm/mnemonics.h" #include "compat.h" /* C DLL EXPORTS */ #ifdef SUPPORT_64BIT_OFFSET _DLLEXPORT_ _DecodeResult distorm_decompose64(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount) #else _DLLEXPORT_ _DecodeResult distorm_decompose32(_CodeInfo* ci, _DInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount) #endif { if (usedInstructionsCount == NULL) { return DECRES_SUCCESS; } /* DECRES_SUCCESS still may indicate we may have something in the result, so zero it first thing. */ *usedInstructionsCount = 0; if ((ci == NULL) || (ci->codeLen < 0) || ((ci->dt != Decode16Bits) && (ci->dt != Decode32Bits) && (ci->dt != Decode64Bits)) || (ci->code == NULL) || (result == NULL) || ((ci->features & (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32)) == (DF_MAXIMUM_ADDR16 | DF_MAXIMUM_ADDR32))) { return DECRES_INPUTERR; } /* Assume length=0 is success. */ if (ci->codeLen == 0) { return DECRES_SUCCESS; } return decode_internal(ci, FALSE, result, maxInstructions, usedInstructionsCount); } #ifndef DISTORM_LIGHT /* Helper function to concatenate an explicit size when it's unknown from the operands. */ static void distorm_format_size(_WString* str, const _DInst* di, int opNum) { int isSizingRequired = 0; /* * We only have to output the size explicitly if it's not clear from the operands. * For example: * mov al, [0x1234] -> The size is 8, we know it from the AL register operand. * mov [0x1234], 0x11 -> Now we don't know the size. Pam pam pam * * If given operand number is higher than 2, then output the size anyways. */ isSizingRequired = ((opNum >= 2) || ((di->ops[0].type != O_REG) && (di->ops[1].type != O_REG))); /* Still not sure? Try some special instructions. */ if (!isSizingRequired) { /* * INS/OUTS are exception, because DX is a port specifier and not a real src/dst register. * A few exceptions that always requires sizing: * MOVZX, MOVSX, MOVSXD. * ROL, ROR, RCL, RCR, SHL, SHR, SAL, SAR. * SHLD, SHRD. */ switch (di->opcode) { case I_INS: case I_OUTS: case I_MOVZX: case I_MOVSX: case I_MOVSXD: case I_ROL: case I_ROR: case I_RCL: case I_RCR: case I_SHL: case I_SHR: case I_SAL: case I_SAR: case I_SHLD: case I_SHRD: isSizingRequired = 1; break; default: /* Instruction doesn't require sizing. */ break; } } if (isSizingRequired) { switch (di->ops[opNum].size) { case 0: break; /* OT_MEM's unknown size. */ case 8: strcat_WSN(str, "BYTE "); break; case 16: strcat_WSN(str, "WORD "); break; case 32: strcat_WSN(str, "DWORD "); break; case 64: strcat_WSN(str, "QWORD "); break; case 80: strcat_WSN(str, "TBYTE "); break; case 128: strcat_WSN(str, "DQWORD "); break; case 256: strcat_WSN(str, "YWORD "); break; default: /* Big oh uh if it gets here. */ break; } } } static void distorm_format_signed_disp(_WString* str, const _DInst* di, uint64_t addrMask) { int64_t tmpDisp64; if (di->dispSize) { chrcat_WS(str, ((int64_t)di->disp < 0) ? MINUS_DISP_CHR : PLUS_DISP_CHR); if ((int64_t)di->disp < 0) tmpDisp64 = -(int64_t)di->disp; else tmpDisp64 = di->disp; tmpDisp64 &= addrMask; str_code_hqw(str, (uint8_t*)&tmpDisp64); } } #ifdef SUPPORT_64BIT_OFFSET _DLLEXPORT_ void distorm_format64(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result) #else _DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result) #endif { _WString* str; unsigned int i, isDefault; int64_t tmpDisp64; uint64_t addrMask = (uint64_t)-1; uint8_t segment; const _WMnemonic* mnemonic; /* Set address mask, when default is for 64bits addresses. */ if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff; else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff; /* Copy other fields. */ result->size = di->size; result->offset = di->addr; if (di->flags == FLAG_NOT_DECODABLE) { str = &result->mnemonic; result->offset &= addrMask; strclear_WS(&result->operands); strcpy_WSN(str, "DB "); str_code_hb(str, di->imm.byte); strclear_WS(&result->instructionHex); str_hex_b(&result->instructionHex, di->imm.byte); return; /* Skip to next instruction. */ } str = &result->instructionHex; strclear_WS(str); /* Gotta have full address for (di->addr - ci->codeOffset) to work in all modes. */ for (i = 0; i < di->size; i++) str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]); /* Truncate address now. */ result->offset &= addrMask; str = &result->mnemonic; switch (FLAG_GET_PREFIX(di->flags)) { case FLAG_LOCK: strcpy_WSN(str, "LOCK "); break; case FLAG_REP: /* REP prefix for CMPS and SCAS is really a REPZ. */ if ((di->opcode == I_CMPS) || (di->opcode == I_SCAS)) strcpy_WSN(str, "REPZ "); else strcpy_WSN(str, "REP "); break; case FLAG_REPNZ: strcpy_WSN(str, "REPNZ "); break; default: /* Init mnemonic string, cause next touch is concatenation. */ strclear_WS(str); break; } mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode]; COMPAT(memcpy)((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1); str->length += mnemonic->length; /* Format operands: */ str = &result->operands; strclear_WS(str); /* Special treatment for String instructions. */ if ((META_GET_ISC(di->meta) == ISC_INTEGER) && ((di->opcode == I_MOVS) || (di->opcode == I_CMPS) || (di->opcode == I_STOS) || (di->opcode == I_LODS) || (di->opcode == I_SCAS))) { /* * No operands are needed if the address size is the default one, * and no segment is overridden, so add the suffix letter, * to indicate size of operation and continue to next instruction. */ if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) { str = &result->mnemonic; switch (di->ops[0].size) { case 8: chrcat_WS(str, 'B'); break; case 16: chrcat_WS(str, 'W'); break; case 32: chrcat_WS(str, 'D'); break; case 64: chrcat_WS(str, 'Q'); break; } return; } } for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) { if (i > 0) strcat_WSN(str, ", "); switch (di->ops[i].type) { case O_REG: strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); break; case O_IMM: /* If the instruction is 'push', show explicit size (except byte imm). */ if ((di->opcode == I_PUSH) && (di->ops[i].size != 8)) distorm_format_size(str, di, i); /* Special fix for negative sign extended immediates. */ if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) { if (di->imm.sbyte < 0) { chrcat_WS(str, MINUS_DISP_CHR); str_code_hb(str, -di->imm.sbyte); break; } } if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword); else str_code_hdw(str, di->imm.dword); break; case O_IMM1: str_code_hdw(str, di->imm.ex.i1); break; case O_IMM2: str_code_hdw(str, di->imm.ex.i2); break; case O_DISP: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) { strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]); chrcat_WS(str, SEG_OFF_CHR); } tmpDisp64 = di->disp & addrMask; str_code_hqw(str, (uint8_t*)&tmpDisp64); chrcat_WS(str, CLOSE_CHR); break; case O_SMEM: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); /* * This is where we need to take special care for String instructions. * If we got here, it means we need to explicitly show their operands. * The problem with CMPS and MOVS is that they have two(!) memory operands. * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden. * And make the rest of the String operations explicit. */ segment = SEGMENT_GET(di->segment); isDefault = SEGMENT_IS_DEFAULT(di->segment); switch (di->opcode) { case I_MOVS: isDefault = FALSE; if (i == 0) segment = R_ES; break; case I_CMPS: isDefault = FALSE; if (i == 1) segment = R_ES; break; case I_INS: case I_LODS: case I_STOS: case I_SCAS: isDefault = FALSE; break; } if (!isDefault && (segment != R_NONE)) { strcat_WS(str, (const _WString*)&_REGISTERS[segment]); chrcat_WS(str, SEG_OFF_CHR); } strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); distorm_format_signed_disp(str, di, addrMask); chrcat_WS(str, CLOSE_CHR); break; case O_MEM: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) { strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]); chrcat_WS(str, SEG_OFF_CHR); } if (di->base != R_NONE) { strcat_WS(str, (const _WString*)&_REGISTERS[di->base]); chrcat_WS(str, PLUS_DISP_CHR); } strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); if (di->scale != 0) { chrcat_WS(str, '*'); if (di->scale == 2) chrcat_WS(str, '2'); else if (di->scale == 4) chrcat_WS(str, '4'); else /* if (di->scale == 8) */ chrcat_WS(str, '8'); } distorm_format_signed_disp(str, di, addrMask); chrcat_WS(str, CLOSE_CHR); break; case O_PC: #ifdef SUPPORT_64BIT_OFFSET str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask); #else str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask); #endif break; case O_PTR: str_code_hdw(str, di->imm.ptr.seg); chrcat_WS(str, SEG_OFF_CHR); str_code_hdw(str, di->imm.ptr.off); break; } } if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN"); else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN"); } #ifdef SUPPORT_64BIT_OFFSET _DLLEXPORT_ _DecodeResult distorm_decode64(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount) #else _DLLEXPORT_ _DecodeResult distorm_decode32(_OffsetType codeOffset, const unsigned char* code, int codeLen, _DecodeType dt, _DecodedInst result[], unsigned int maxInstructions, unsigned int* usedInstructionsCount) #endif { _DecodeResult res; _DInst di; _CodeInfo ci; unsigned int instsCount = 0, i; *usedInstructionsCount = 0; /* I use codeLen as a signed variable in order to ease detection of underflow... and besides - */ if (codeLen < 0) { return DECRES_INPUTERR; } if ((dt != Decode16Bits) && (dt != Decode32Bits) && (dt != Decode64Bits)) { return DECRES_INPUTERR; } if (code == NULL || result == NULL) { return DECRES_INPUTERR; } /* Assume length=0 is success. */ if (codeLen == 0) { return DECRES_SUCCESS; } /* * We have to format the result into text. But the interal decoder works with the new structure of _DInst. * Therefore, we will pass the result array(!) from the caller and the interal decoder will fill it in with _DInst's. * Then we will copy each result to a temporary structure, and use it to reformat that specific result. * * This is all done to save memory allocation and to work on the same result array in-place!!! * It's a bit ugly, I have to admit, but worth it. */ ci.codeOffset = codeOffset; ci.code = code; ci.codeLen = codeLen; ci.dt = dt; ci.features = DF_NONE; if (dt == Decode16Bits) ci.features = DF_MAXIMUM_ADDR16; else if (dt == Decode32Bits) ci.features = DF_MAXIMUM_ADDR32; res = decode_internal(&ci, TRUE, (_DInst*)result, maxInstructions, &instsCount); for (i = 0; i < instsCount; i++) { if ((*usedInstructionsCount + i) >= maxInstructions) return DECRES_MEMORYERR; /* Copy the current decomposed result to a temp structure, so we can override the result with text. */ COMPAT(memcpy)(&di, (char*)result + (i * sizeof(_DecodedInst)), sizeof(_DInst)); #ifdef SUPPORT_64BIT_OFFSET distorm_format64(&ci, &di, &result[i]); #else distorm_format32(&ci, &di, &result[i]); #endif } *usedInstructionsCount = instsCount; return res; } #endif /* DISTORM_LIGHT */