diff options
author | Toni Uhlig <matzeton@googlemail.com> | 2020-05-24 16:48:22 +0200 |
---|---|---|
committer | Toni Uhlig <matzeton@googlemail.com> | 2020-05-25 21:57:14 +0200 |
commit | 31c69b6ca1b91e7fd9fd8e14082fd2584c5f538c (patch) | |
tree | 16e789c7d68608831b498f41f54d9482b82a711a /source/distorm/prefix.c |
first public release
Signed-off-by: Toni Uhlig <matzeton@googlemail.com>
Diffstat (limited to 'source/distorm/prefix.c')
-rw-r--r-- | source/distorm/prefix.c | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/source/distorm/prefix.c b/source/distorm/prefix.c new file mode 100644 index 0000000..77cbeea --- /dev/null +++ b/source/distorm/prefix.c @@ -0,0 +1,368 @@ +/* +prefix.c + +diStorm3 - Powerful disassembler for X86/AMD64 +http://ragestorm.net/distorm/ +distorm at gmail dot com +Copyright (C) 2003-2016 Gil Dabah +This library is licensed under the BSD license. See the file COPYING. +*/ + + +#include "prefix.h" + +#include "x86defs.h" +#include "instructions.h" +#include "distorm/mnemonics.h" + + +/* + * The main purpose of this module is to keep track of all kind of prefixes a single instruction may have. + * The problem is that a single instruction may have up to six different prefix-types. + * That's why I have to detect such cases and drop those excess prefixes. + */ + +int prefixes_is_valid(unsigned int ch, _DecodeType dt) +{ + switch (ch) { + /* for i in xrange(0x40, 0x50): print "case 0x%2x:" % i */ + case 0x40: /* REX: */ + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: return (dt == Decode64Bits); + case PREFIX_LOCK: return TRUE; + case PREFIX_REPNZ: return TRUE; + case PREFIX_REP: return TRUE; + case PREFIX_CS: return TRUE; + case PREFIX_SS: return TRUE; + case PREFIX_DS: return TRUE; + case PREFIX_ES: return TRUE; + case PREFIX_FS: return TRUE; + case PREFIX_GS: return TRUE; + case PREFIX_OP_SIZE: return TRUE; + case PREFIX_ADDR_SIZE: return TRUE; + /* The VEXs might be false positives, the decode_perfixes will determine for sure. */ + case PREFIX_VEX2b: /* VEX is supported for all modes, because 16 bits Pmode is included. */ + case PREFIX_VEX3b: return TRUE; + } + return FALSE; +} + +/* Ignore a specific prefix type. */ +void prefixes_ignore(_PrefixState* ps, _PrefixIndexer pi) +{ + /* + * If that type of prefix appeared already, set the bit of that *former* prefix. + * Anyway, set the new index of that prefix type to the current index, so next time we know its position. + */ + if (ps->pfxIndexer[pi] != PFXIDX_NONE) ps->unusedPrefixesMask |= (1 << ps->pfxIndexer[pi]); +} + +/* Ignore all prefix. */ +void prefixes_ignore_all(_PrefixState* ps) +{ + int i; + for (i = 0; i < PFXIDX_MAX; i++) + prefixes_ignore(ps, i); +} + +/* Calculates which prefixes weren't used and accordingly sets the bits in the unusedPrefixesMask. */ +uint16_t prefixes_set_unused_mask(_PrefixState* ps) +{ + /* + * The decodedPrefixes represents the prefixes that were *read* from the binary stream for the instruction. + * The usedPrefixes represents the prefixes that were actually used by the instruction in the *decode* phase. + * Xoring between the two will result in a 'diff' which returns the prefixes that were read + * from the stream *and* that were never used in the actual decoding. + * + * Only one prefix per type can be set in decodedPrefixes from the stream. + * Therefore it's enough to check each type once and set the flag accordingly. + * That's why we had to book-keep each prefix type and its position. + * So now we know which bits we need to set exactly in the mask. + */ + _iflags unusedPrefixesDiff = ps->decodedPrefixes ^ ps->usedPrefixes; + + /* Examine unused prefixes by type: */ + /* + * About REX: it might be set in the diff although it was never in the stream itself. + * This is because the vrex is shared between VEX and REX and some places flag it as REX usage, while + * we were really decoding an AVX instruction. + * It's not a big problem, because the prefixes_ignore func will ignore it anyway, + * since it wasn't seen earlier. But it's important to know this. + */ + if (unusedPrefixesDiff & INST_PRE_REX) prefixes_ignore(ps, PFXIDX_REX); + if (unusedPrefixesDiff & INST_PRE_SEGOVRD_MASK) prefixes_ignore(ps, PFXIDX_SEG); + if (unusedPrefixesDiff & INST_PRE_LOKREP_MASK) prefixes_ignore(ps, PFXIDX_LOREP); + if (unusedPrefixesDiff & INST_PRE_OP_SIZE) prefixes_ignore(ps, PFXIDX_OP_SIZE); + if (unusedPrefixesDiff & INST_PRE_ADDR_SIZE) prefixes_ignore(ps, PFXIDX_ADRS); + /* If a VEX instruction was found, its prefix is considered as used, therefore no point for checking for it. */ + + return ps->unusedPrefixesMask; +} + +/* + * Mark a prefix as unused, and bookkeep where we last saw this same type, + * because in the future we might want to disable it too. + */ +_INLINE_ void prefixes_track_unused(_PrefixState* ps, int index, _PrefixIndexer pi) +{ + prefixes_ignore(ps, pi); + /* Book-keep the current index for this type. */ + ps->pfxIndexer[pi] = index; +} + +/* + * Read as many prefixes as possible, up to 15 bytes, and halt when we encounter non-prefix byte. + * This algorithm tries to imitate a real processor, where the same prefix can appear a few times, etc. + * The tiny complexity is that we want to know when a prefix was superfluous and mark any copy of it as unused. + * Note that the last prefix of its type will be considered as used, and all the others (of same type) before it as unused. + */ +void prefixes_decode(const uint8_t* code, int codeLen, _PrefixState* ps, _DecodeType dt) +{ + int index, done; + uint8_t vex; + + /* + * First thing to do, scan for prefixes, there are six types of prefixes. + * There may be up to six prefixes before a single instruction, not the same type, no special order, + * except REX/VEX must precede immediately the first opcode byte. + * BTW - This is the reason why I didn't make the REP prefixes part of the instructions (STOS/SCAS/etc). + * + * Another thing, the instruction maximum size is 15 bytes, thus if we read more than 15 bytes, we will halt. + * + * We attach all prefixes to the next instruction, there might be two or more occurrences from the same prefix. + * Also, since VEX can be allowed only once we will test it separately. + */ + for (index = 0, done = FALSE; + (codeLen > 0) && (code - ps->start < INST_MAXIMUM_SIZE); + code++, codeLen--, index++) { + /* + NOTE: AMD treat lock/rep as two different groups... But I am based on Intel. + + - Lock and Repeat: + - 0xF0 — LOCK + - 0xF2 — REPNE/REPNZ + - 0xF3 - REP/REPE/REPZ + - Segment Override: + - 0x2E - CS + - 0x36 - SS + - 0x3E - DS + - 0x26 - ES + - 0x64 - FS + - 0x65 - GS + - Operand-Size Override: 0x66, switching default size. + - Address-Size Override: 0x67, switching default size. + + 64 Bits: + - REX: 0x40 - 0x4f, extends register access. + - 2 Bytes VEX: 0xc4 + - 3 Bytes VEX: 0xc5 + 32 Bits: + - 2 Bytes VEX: 0xc4 11xx-xxxx + - 3 Bytes VEX: 0xc5 11xx-xxxx + */ + + /* Examine what type of prefix we got. */ + switch (*code) + { + /* REX type, 64 bits decoding mode only: */ + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + if (dt == Decode64Bits) { + ps->decodedPrefixes |= INST_PRE_REX; + ps->vrex = *code & 0xf; /* Keep only BXRW. */ + ps->rexPos = code; + ps->prefixExtType = PET_REX; + prefixes_track_unused(ps, index, PFXIDX_REX); + } else done = TRUE; /* If we are not in 64 bits mode, it's an instruction, then halt. */ + break; + + /* LOCK and REPx type: */ + case PREFIX_LOCK: + ps->decodedPrefixes |= INST_PRE_LOCK; + prefixes_track_unused(ps, index, PFXIDX_LOREP); + break; + case PREFIX_REPNZ: + ps->decodedPrefixes |= INST_PRE_REPNZ; + prefixes_track_unused(ps, index, PFXIDX_LOREP); + break; + case PREFIX_REP: + ps->decodedPrefixes |= INST_PRE_REP; + prefixes_track_unused(ps, index, PFXIDX_LOREP); + break; + + /* Seg Overide type: */ + case PREFIX_CS: + ps->decodedPrefixes |= INST_PRE_CS; + prefixes_track_unused(ps, index, PFXIDX_SEG); + break; + case PREFIX_SS: + ps->decodedPrefixes |= INST_PRE_SS; + prefixes_track_unused(ps, index, PFXIDX_SEG); + break; + case PREFIX_DS: + ps->decodedPrefixes |= INST_PRE_DS; + prefixes_track_unused(ps, index, PFXIDX_SEG); + break; + case PREFIX_ES: + ps->decodedPrefixes |= INST_PRE_ES; + prefixes_track_unused(ps, index, PFXIDX_SEG); + break; + case PREFIX_FS: + ps->decodedPrefixes |= INST_PRE_FS; + prefixes_track_unused(ps, index, PFXIDX_SEG); + break; + case PREFIX_GS: + ps->decodedPrefixes |= INST_PRE_GS; + prefixes_track_unused(ps, index, PFXIDX_SEG); + break; + + /* Op Size type: */ + case PREFIX_OP_SIZE: + ps->decodedPrefixes |= INST_PRE_OP_SIZE; + prefixes_track_unused(ps, index, PFXIDX_OP_SIZE); + break; + + /* Addr Size type: */ + case PREFIX_ADDR_SIZE: + ps->decodedPrefixes |= INST_PRE_ADDR_SIZE; + prefixes_track_unused(ps, index, PFXIDX_ADRS); + break; + + /* Non-prefix byte now, so break 2. */ + default: done = TRUE; break; + } + if (done) break; + } + + /* 2 Bytes VEX: */ + if ((codeLen >= 2) && + (*code == PREFIX_VEX2b) && + ((code - ps->start) <= INST_MAXIMUM_SIZE - 2)) { + /* + * In 32 bits the second byte has to be in the special range of Mod=11. + * Otherwise it might be a normal LDS instruction. + */ + if ((dt == Decode64Bits) || (*(code + 1) >= INST_DIVIDED_MODRM)) { + ps->vexPos = code + 1; + ps->decodedPrefixes |= INST_PRE_VEX; + ps->prefixExtType = PET_VEX2BYTES; + + /* + * VEX 1 byte bits: + * |7-6--3-2-10| + * |R|vvvv|L|pp| + * |-----------| + */ + + /* -- Convert from VEX prefix to VREX flags -- */ + vex = *ps->vexPos; + if (~vex & 0x80 && dt == Decode64Bits) ps->vrex |= PREFIX_EX_R; /* Convert VEX.R. */ + if (vex & 4) ps->vrex |= PREFIX_EX_L; /* Convert VEX.L. */ + + code += 2; + } + } + + /* 3 Bytes VEX: */ + if ((codeLen >= 3) && + (*code == PREFIX_VEX3b) && + ((code - ps->start) <= INST_MAXIMUM_SIZE - 3) && + (~ps->decodedPrefixes & INST_PRE_VEX)) { + /* + * In 32 bits the second byte has to be in the special range of Mod=11. + * Otherwise it might be a normal LES instruction. + * And we don't care now about the 3rd byte. + */ + if ((dt == Decode64Bits) || (*(code + 1) >= INST_DIVIDED_MODRM)) { + ps->vexPos = code + 1; + ps->decodedPrefixes |= INST_PRE_VEX; + ps->prefixExtType = PET_VEX3BYTES; + + /* + * VEX first and second bytes: + * |7-6-5-4----0| |7-6--3-2-10| + * |R|X|B|m-mmmm| |W|vvvv|L|pp| + * |------------| |-----------| + */ + + /* -- Convert from VEX prefix to VREX flags -- */ + vex = *ps->vexPos; + ps->vrex |= ((~vex >> 5) & 0x7); /* Shift and invert VEX.R/X/B to their place */ + vex = *(ps->vexPos + 1); + if (vex & 4) ps->vrex |= PREFIX_EX_L; /* Convert VEX.L. */ + if (vex & 0x80) ps->vrex |= PREFIX_EX_W; /* Convert VEX.W. */ + + /* Clear some flags if the mode isn't 64 bits. */ + if (dt != Decode64Bits) ps->vrex &= ~(PREFIX_EX_B | PREFIX_EX_X | PREFIX_EX_R | PREFIX_EX_W); + + code += 3; + } + } + + /* + * Save last byte scanned address, so the decoder could keep on scanning from this point and on and on and on. + * In addition the decoder is able to know that the last byte could lead to MMX/SSE instructions (preceding REX if exists). + */ + ps->last = code; /* ps->last points to an opcode byte. */ +} + +/* + * For every memory-indirection operand we want to set its corresponding default segment. + * If the segment is being overrided, we need to see whether we use it or not. + * We will use it only if it's not the default one already. + */ +void prefixes_use_segment(_iflags defaultSeg, _PrefixState* ps, _DecodeType dt, _DInst* di) +{ + _iflags flags = 0; + if (dt == Decode64Bits) flags = ps->decodedPrefixes & INST_PRE_SEGOVRD_MASK64; + else flags = ps->decodedPrefixes & INST_PRE_SEGOVRD_MASK; + + if ((flags == 0) || (flags == defaultSeg)) { + flags = defaultSeg; + di->segment |= SEGMENT_DEFAULT; + } else if (flags != defaultSeg) { + /* Use it only if it's non-default segment. */ + ps->usedPrefixes |= flags; + } + + /* ASSERT: R_XX must be below 128. */ + switch (flags) + { + case INST_PRE_ES: di->segment |= R_ES; break; + case INST_PRE_CS: di->segment |= R_CS; break; + case INST_PRE_SS: di->segment |= R_SS; break; + case INST_PRE_DS: di->segment |= R_DS; break; + case INST_PRE_FS: di->segment |= R_FS; break; + case INST_PRE_GS: di->segment |= R_GS; break; + } + + /* If it's one of the CS,SS,DS,ES and the mode is 64 bits, set segment it to none, since it's ignored. */ + if ((dt == Decode64Bits) && (flags & INST_PRE_SEGOVRD_MASK32)) di->segment = R_NONE; +} |