1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
|
/*
random notes:
REX: 0100wrxb
prefixes REX opc ModR/M SIB displacement immediate
1A/C: modrm stuff
i64: 32 only
o64: 64 only
CDEGMNPQRSUVW: modrm
EMQW: modrm w/ address
IJO: immediate
L: 8-bit immediate
VEX last byte 1:0: {none, 66, f3, f2}
*/
/* This is probably not the most efficient implementation, but hopefully good
* enough... */
#define REP4(x) x, x, x, x
#define REP8(x) REP4(x), REP4(x)
#define REP16(x) REP8(x), REP8(x)
#define I_8 0x01
#define I_16 0x02
#define I_24 0x03
#define I_32 0x04
#define I_v 0x05
#define I_z 0x06
#define I_p 0x07
#define I_IMM_MASK 0x07
#define I_MOD 0x08
#define I_ADDR 0x10
#define I_MODA (I_MOD|I_ADDR)
/* mutually exclusive types */
#define I_PFX 0x20 /* prefix */
#define I_JMP 0x40 /* execution does not continue after this */
#define I_SPEC 0x60 /* special case */
#define I_TYPE_MASK 0x60
#define I_JIMM (0x80|I_JMP) /* imm is jump offset */
#define I_BAD 0x80
#ifdef TARGET_x86_64
#define if64(_64, _32) _64
#else
#define if64(_64, _32) _32
#endif
#define i64(x) if64(I_BAD, x)
#define o64(x) if64(x, I_BAD)
static const uint8_t onebyte_bits[] = {
/*00*/ REP4(I_MODA), I_8, I_z, i64(0), i64(0), REP4(I_MODA), I_8, I_z, i64(0), I_SPEC,
/*10*/ REP4(I_MODA), I_8, I_z, i64(0), i64(0), REP4(I_MODA), I_8, I_z, i64(0), i64(0),
/*20*/ REP4(I_MODA), I_8, I_z, I_PFX, i64(0), REP4(I_MODA), I_8, I_z, I_PFX, i64(0),
/*30*/ REP4(I_MODA), I_8, I_z, I_PFX, i64(0), REP4(I_MODA), I_8, I_z, I_PFX, i64(0),
/*40*/ REP16(if64(I_PFX, 0)),
/*50*/ REP16(0),
/*60*/ i64(0), i64(0), i64(I_MOD), I_MODA, I_PFX, I_PFX, I_PFX, I_PFX,
/*68*/ I_z, I_MODA|I_z, I_8, I_MODA|I_8, REP4(0),
/*70*/ REP16(I_8|I_JIMM),
/*80*/ I_MODA|I_8, I_MODA|I_v, i64(I_MODA|I_8), I_MODA, I_MODA, I_MODA, I_MODA, I_MODA,
/*88*/ REP4(I_MODA), I_MODA, I_MOD, I_MODA, if64(I_PFX, I_MODA),
/*90*/ REP8(0), 0, 0, i64(I_p), 0, 0, 0, 0, 0,
/*A0*/ I_8, I_v, I_8, I_v, REP4(0), I_8, I_z, 0, 0, 0, 0, 0, 0,
/*B0*/ REP8(I_8), REP8(I_v),
/*C0*/ I_MODA|I_8, I_MODA|I_8, I_16|I_JMP, I_JMP,
/*C4*/ if64(I_PFX, I_MODA), if64(I_PFX, I_MODA), I_MODA|I_8, I_MODA|I_8,
/*C8*/ I_24, 0, I_16|I_JMP, I_JMP, 0, I_8, i64(0), I_JMP,
/*D0*/ REP4(I_MODA), i64(I_8), i64(I_8), I_BAD, 0, REP8(I_SPEC),
/* don't treat ljmp as a jump for now */
/*E0*/ REP4(I_8|I_JIMM), REP4(I_8),
/*E8*/ (I_z|I_JIMM)&~I_JMP, I_z|I_JIMM, i64(I_p), I_8|I_JIMM, 0, 0, 0, 0,
/*F0*/ I_PFX, I_BAD, I_PFX, I_PFX, 0, 0, I_MODA, I_MODA,
/*F8*/ 0, 0, 0, 0, 0, 0, I_MODA, I_SPEC,
};
_Static_assert(sizeof(onebyte_bits) == 256, "onebyte_bits");
/* Note:
*All* currently defined 0f 38 opcodes are I_MODA. Assuming that any
unknown such opcodes are also I_MODA is probably better than generic
unknown.
Similarly, all defined 0f 3a opcodes are I_MODA|I_8.
*/
static const uint8_t _0f_bits[] = {
/*00*/ I_MODA, I_MODA, 0, 0, I_BAD, o64(0), 0, o64(0),
/*08*/ 0, 0, I_BAD, 0, 0, I_MODA, 0, 0,
/*10*/ REP8(I_MODA), I_MODA, I_BAD, I_BAD, I_BAD, I_BAD, I_BAD, I_BAD, I_MODA,
/*20*/ REP4(I_MOD), REP4(I_BAD), REP8(I_MODA),
/*30*/ 0, 0, 0, 0, 0, 0, I_BAD, 0, I_MODA, I_BAD, I_MODA|I_8, I_BAD, REP4(I_BAD),
/*40*/ REP16(I_MODA),
/*50*/ I_MOD, I_MODA, I_MODA, I_MODA, REP4(I_MODA), REP8(I_MODA),
/*60*/ REP16(I_MODA),
/*70*/ I_MODA, I_MOD|I_8, I_MOD|I_8, I_MOD|I_8, I_MODA, I_MODA, I_MODA, 0,
/*78*/ I_MODA, I_MODA, I_BAD, I_BAD, REP4(I_MODA),
/*80*/ REP16(I_z),
/*90*/ REP16(I_MODA),
/*Ax*/ 0, 0, 0, 0, 0, 0, I_BAD, I_BAD,
/*A8*/ 0, 0, 0, I_MODA, I_MODA|I_8, I_MODA, I_MODA, I_MODA,
/*B0*/ REP8(I_MODA), I_MODA, 0, I_MODA|I_8, I_MODA, REP4(I_MODA),
/*C0*/ I_MODA, I_MODA, I_MODA|I_8, I_MODA, I_MODA|I_8, I_MOD|I_8, I_MODA|I_8, I_MODA|I_z,
/*C8*/ REP8(0),
/*D0*/ REP4(I_MODA), I_MODA, I_MODA, I_MODA, I_MOD, REP8(I_MODA),
/*E0*/ REP16(I_MODA),
/*F0*/ REP4(I_MODA), I_MODA, I_MODA, I_MODA, I_MOD,
/*F8*/ REP4(I_MODA), I_MODA, I_MODA, I_MODA, I_BAD,
};
_Static_assert(sizeof(_0f_bits) == 256, "_0f_bits");
static void P(dis)(tdis_ctx ctx) {
const uint8_t *orig = ctx->ptr;
const uint8_t *ptr = ctx->ptr;
int opnd_size = 4;
int mod, rm = 0;
restart:;
uint8_t byte1 = *ptr++;
uint8_t bits = onebyte_bits[byte1];
/* printf("b1=%x bytes=%x\n", byte1, bits); */
if ((bits & I_TYPE_MASK) == I_SPEC) {
if (byte1 == 0x0f) {
uint8_t byte2 = *ptr++;
bits = _0f_bits[byte2];
} else if ((byte1 & 0xf8) == 0xd8) {
/* ESC */
ptr++;
bits = I_MODA;
} else if (byte1 == 0xff) {
uint8_t modrm = *ptr;
if (modrm >> 6 == 3) {
int subop = modrm >> 3 & 7;
if (subop == 4 || subop == 5) /* JMP */
bits = I_JMP | I_MODA;
else
bits = I_MODA;
}
} else {
__builtin_abort();
}
}
got_bits: UNUSED
if (bits == I_BAD)
return P(bad)(ctx);
if ((bits & I_TYPE_MASK) == I_PFX) {
if (byte1 == 0x66) {
opnd_size = 2;
goto restart;
#ifdef TARGET_x86_64
} else if ((byte1 & 0xf0) == 0x40) { /* REX */
if (byte1 & 8) /* W */
opnd_size = 8;
if (byte1 & 1) /* B */
rm = 8;
goto restart;
} else if (byte1 == 0xc4) { /* VEX 3 */
uint8_t byte2 = *ptr++;
if (!(byte2 & 0x20)) /* VEX.~B */
rm = 8;
UNUSED uint8_t byte3 = *ptr++;
ptr++;
int map = byte2 & 0x1f;
switch (map) {
case 1:
bits = _0f_bits[byte2];
break;
case 2:
bits = _0f_bits[0x38];
break;
case 3:
bits = _0f_bits[0x3a];
break;
default:
bits = I_BAD;
break;
}
goto got_bits;
} else if (byte1 == 0xc5) { /* VEX 2 */
uint8_t byte2 = *ptr++;
bits = _0f_bits[byte2];
goto got_bits;
} else if (byte1 == 0x8f) { /* XOP (AMD only) */
uint8_t byte2 = *ptr;
/* could be modrm */
if ((byte2 >> 3 & 7) == 0)
goto modrm;
ptr++; /* ok, definitely XOP */
if (!(byte2 & 0x20)) /* VEX.~B */
rm = 8;
int map = byte2 & 0x1f;
switch (map) {
case 8:
bits = I_MODA|I_8;
break;
case 9:
bits = I_MODA;
break;
case 10:
bits = I_MODA|I_32;
break;
default:
bits = I_BAD;
break;
}
goto got_bits;
#endif
} else {
/* other prefix we don't care about */
goto restart;
}
}
UNUSED int modrm_off = ptr - orig;
if (bits & I_MOD) {
modrm: UNUSED;
uint8_t modrm = *ptr++;
mod = modrm >> 6;
rm |= modrm & 7;
if (rm == 4) {
/* sib */
ptr++;
}
/* displacement */
#ifdef TARGET_x86_64
if (mod == 0 && rm == 5)
ptr += 4;
#endif
else if (mod == 1)
ptr++;
else if (mod == 2)
ptr += 4;
}
int imm_off = ptr - orig;
/* disp */
int imm_bits = bits & I_IMM_MASK;
int imm_size;
if (imm_bits <= I_32)
imm_size = imm_bits;
else if (imm_bits == I_v)
imm_size = opnd_size;
else if (imm_bits == I_z)
imm_size = opnd_size == 2 ? 2 : 4;
else if (imm_bits == I_p)
imm_size = opnd_size == 2 ? 4 : 6;
else /* because GCC is stupid */
__builtin_abort();
ptr += imm_size;
ctx->ptr = ptr;
ctx->op_size = ptr - orig;
/* printf("bits=%x\n", bits); */
if ((bits & I_JIMM) == I_JIMM) {
int32_t imm;
const void *imm_ptr = orig + imm_off;
switch (imm_size) {
case 1: imm = *(int8_t *) imm_ptr; break;
case 2: imm = *(int16_t *) imm_ptr; break;
case 4: imm = *(int32_t *) imm_ptr; break;
default: __builtin_abort();
}
bool cond = (byte1 & 0xf0) != 0xe0;
bool call = !(bits & I_JMP);
P(branch)(ctx, ctx->pc + ctx->op_size + imm,
cond * CC_CONDITIONAL | call * CC_CALL);
if (TDIS_CTX_MODIFY(ctx)) {
/* newval[0] should be the new immediate */
int32_t new_imm = TDIS_CTX_NEWVAL(ctx, 0);
uint8_t *new_op = TDIS_CTX_NEWOP(ctx);
memcpy(new_op, orig, ctx->op_size);
uint8_t *new_imm_ptr = new_op + imm_off;
switch (imm_size) {
case 1: *(int8_t *) new_imm_ptr = new_imm; break;
case 2: *(int16_t *) new_imm_ptr = new_imm; break;
case 4: *(int32_t *) new_imm_ptr = new_imm; break;
}
}
#ifdef TARGET_x86_64
} else if ((bits & I_MODA) == I_MODA && mod == 0 && rm == 5) {
int32_t disp = *(int32_t *) (orig + modrm_off + 1);
/* unlike ARM, we can always switch to non-pcrel without making the
* instruction from scratch, so we don't have 'reg' and 'lm' */
P(pcrel)(ctx, ctx->pc + ctx->op_size + disp);
if (TDIS_CTX_MODIFY(ctx)) {
uint8_t *new_op = TDIS_CTX_NEWOP(ctx);
memcpy(new_op, orig, ctx->op_size);
/* newval[0] should be the new register, which should be one that
* fits in r/m directly since that's all I need;
* newval[1] should be the new displacement */
uint8_t *new_modrm_ptr = new_op + modrm_off;
*new_modrm_ptr = (*new_modrm_ptr & ~0xc7) | 4 << 6 | TDIS_CTX_NEWVAL(ctx, 0);
*(uint32_t *) (new_modrm_ptr + 1) = TDIS_CTX_NEWVAL(ctx, 1);
}
#endif
} else if ((bits & I_TYPE_MASK) == I_JMP) {
P(ret)(ctx);
} else {
P(unidentified)(ctx);
}
}
|